From cee6b8475570c82cd0a485a7f30fba6aa4c69d3c Mon Sep 17 00:00:00 2001
From: iamahuman <iamahuman@users.noreply.github.com>
Date: Mon, 29 Apr 2019 16:37:21 +0900
Subject: [PATCH] Synchronize code with upstream commonmark.js 0.29

 * Updated code to match commonmark/commonmark.js@5eebfd3868b812abb2ba5830ca0cc6c9b638add7
 * Improved block dispatch performance by using a pre-cached dict instead of importlib
 * Add missing Unicode case folding for reference normalization
 * Fixed a bug where empty link labels were not properly recognized
---
 commonmark/blocks.py              |  87 ++---
 commonmark/common.py              |  19 +-
 commonmark/inlines.py             |  68 ++--
 commonmark/node.py                |   1 +
 commonmark/normalize_reference.py | 165 +++++++++
 commonmark/render/html.py         |  16 +-
 commonmark/tests/unit_tests.py    |  28 +-
 commonmark/utils.py               |   3 -
 spec.txt                          | 548 +++++++++++++++++++++++-------
 9 files changed, 700 insertions(+), 235 deletions(-)
 create mode 100644 commonmark/normalize_reference.py
 delete mode 100644 commonmark/utils.py

diff --git a/commonmark/blocks.py b/commonmark/blocks.py
index fa236a8..aeec21d 100644
--- a/commonmark/blocks.py
+++ b/commonmark/blocks.py
@@ -1,12 +1,10 @@
 from __future__ import absolute_import, unicode_literals
 
 import re
-from importlib import import_module
 from commonmark import common
 from commonmark.common import unescape_string
 from commonmark.inlines import InlineParser
 from commonmark.node import Node
-from commonmark.utils import to_camel_case
 
 
 CODE_INDENT = 4
@@ -21,7 +19,7 @@
         r'^<[/]?(?:address|article|aside|base|basefont|blockquote|body|'
         r'caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|'
         r'fieldset|figcaption|figure|footer|form|frame|frameset|h1|head|'
-        r'header|hr|html|iframe|legend|li|link|main|menu|menuitem|meta|'
+        r'header|hr|html|iframe|legend|li|link|main|menu|menuitem|'
         r'nav|noframes|ol|optgroup|option|p|param|section|source|title|'
         r'summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)'
         r'(?:\s|[/]?[>]|$)',
@@ -45,7 +43,7 @@
 reBulletListMarker = re.compile(r'^[*+-]')
 reOrderedListMarker = re.compile(r'^(\d{1,9})([.)])')
 reATXHeadingMarker = re.compile(r'^#{1,6}(?:[ \t]+|$)')
-reCodeFence = re.compile(r'^`{3,}(?!.*`)|^~{3,}(?!.*~)')
+reCodeFence = re.compile(r'^`{3,}(?!.*`)|^~{3,}')
 reClosingCodeFence = re.compile(r'^(?:`{3,}|~{3,})(?= *$)')
 reSetextHeadingLine = re.compile(r'^(?:=+|-+)[ \t]*$')
 reLineEnding = re.compile(r'\r\n|\n|\r')
@@ -57,7 +55,7 @@ def is_blank(s):
 
 
 def is_space_or_tab(s):
-    return s == ' ' or s == '\t'
+    return s in (' ', '\t')
 
 
 def peek(ln, pos):
@@ -73,9 +71,12 @@ def ends_with_blank_line(block):
     while block:
         if block.last_line_blank:
             return True
-        if (block.t == 'list' or block.t == 'item'):
+        if not block.last_line_checked and \
+                block.t in ('list', 'item'):
+            block.last_line_checked = True
             block = block.last_child
         else:
+            block.last_line_checked = True
             break
 
     return False
@@ -94,6 +95,8 @@ def parse_list_marker(parser, container):
         'padding': None,
         'marker_offset': parser.indent,
     }
+    if parser.indent >= 4:
+        return None
     m = re.search(reBulletListMarker, rest)
     m2 = re.search(reOrderedListMarker, rest)
     if m:
@@ -515,15 +518,25 @@ def setext_heading(parser, container=None):
                 parser.current_line[parser.next_nonspace:])
             if m:
                 parser.close_unmatched_blocks()
-                heading = Node('heading', container.sourcepos)
-                heading.level = 1 if m.group()[0] == '=' else 2
-                heading.string_content = container.string_content
-                container.insert_after(heading)
-                container.unlink()
-                parser.tip = heading
-                parser.advance_offset(
-                    len(parser.current_line) - parser.offset, False)
-                return 2
+                # resolve reference link definitiosn
+                while peek(container.string_content, 0) == '[':
+                    pos = parser.inline_parser.parseReference(
+                            container.string_content, parser.refmap)
+                    if not pos:
+                        break
+                    container.string_content = container.string_content[pos:]
+                if container.string_content:
+                    heading = Node('heading', container.sourcepos)
+                    heading.level = 1 if m.group()[0] == '=' else 2
+                    heading.string_content = container.string_content
+                    container.insert_after(heading)
+                    container.unlink()
+                    parser.tip = heading
+                    parser.advance_offset(
+                        len(parser.current_line) - parser.offset, False)
+                    return 2
+                else:
+                    return 0
 
         return 0
 
@@ -610,13 +623,8 @@ def add_child(self, tag, offset):
         """ Add block of type tag as a child of the tip.  If the tip can't
         accept children, close and finalize it and try its parent,
         and so on til we find a block that can accept children."""
-        block_class = getattr(import_module('commonmark.blocks'),
-                              to_camel_case(self.tip.t))
-        while not block_class.can_contain(tag):
+        while not self.blocks[self.tip.t].can_contain(tag):
             self.finalize(self.tip, self.line_number - 1)
-            block_class = getattr(
-                import_module('commonmark.blocks'),
-                to_camel_case(self.tip.t))
 
         column_number = offset + 1
         new_block = Node(tag, [[self.line_number, column_number], [0, 0]])
@@ -725,15 +733,15 @@ def incorporate_line(self, ln):
         # For each containing block, try to parse the associated line start.
         # Bail out on failure: container will point to the last matching block.
         # Set all_matched to false if not all containers match.
-        last_child = container.last_child
-        while last_child and last_child.is_open:
+        while True:
+            last_child = container.last_child
+            if not (last_child and last_child.is_open):
+                break
             container = last_child
 
             self.find_next_nonspace()
-            block_class = getattr(
-                import_module('commonmark.blocks'),
-                to_camel_case(container.t))
-            rv = block_class.continue_(self, container)
+
+            rv = self.blocks[container.t].continue_(self, container)
             if rv == 0:
                 # we've matched, keep going
                 pass
@@ -745,21 +753,19 @@ def incorporate_line(self, ln):
                 self.last_line_length = len(ln)
                 return
             else:
-                raise ValueError('returned illegal value, must be 0, 1, or 2')
+                raise ValueError(
+                        'continue_ returned illegal value, must be 0, 1, or 2')
 
             if not all_matched:
                 # back up to last matching block
                 container = container.parent
                 break
 
-            last_child = container.last_child
-
         self.all_closed = (container == self.oldtip)
         self.last_matched_container = container
 
-        block_class = getattr(import_module('commonmark.blocks'),
-                              to_camel_case(container.t))
-        matched_leaf = container.t != 'paragraph' and block_class.accepts_lines
+        matched_leaf = container.t != 'paragraph' and \
+            self.blocks[container.t].accepts_lines
         starts = self.block_starts
         starts_len = len(starts.METHODS)
         # Unless last matched container is a code block, try new container
@@ -824,9 +830,7 @@ def incorporate_line(self, ln):
                 cont.last_line_blank = last_line_blank
                 cont = cont.parent
 
-            block_class = getattr(import_module('commonmark.blocks'),
-                                  to_camel_case(t))
-            if block_class.accepts_lines:
+            if self.blocks[t].accepts_lines:
                 self.add_line()
                 # if HtmlBlock, check for end condition
                 if t == 'html_block' and \
@@ -853,9 +857,8 @@ def finalize(self, block, line_number):
         above = block.parent
         block.is_open = False
         block.sourcepos[1] = [line_number, self.last_line_length]
-        block_class = getattr(import_module('commonmark.blocks'),
-                              to_camel_case(block.t))
-        block_class.finalize(self, block)
+
+        self.blocks[block.t].finalize(self, block)
 
         self.tip = above
 
@@ -897,3 +900,9 @@ def parse(self, my_input):
             self.finalize(self.tip, length)
         self.process_inlines(self.doc)
         return self.doc
+
+
+CAMEL_RE = re.compile("(.)([A-Z](?:[a-z]+|(?<=[a-z0-9].)))")
+Parser.blocks = dict(
+    (CAMEL_RE.sub(r'\1_\2', cls.__name__).lower(), cls)
+    for cls in Block.__subclasses__())
diff --git a/commonmark/common.py b/commonmark/common.py
index 1327869..b15a8b6 100644
--- a/commonmark/common.py
+++ b/commonmark/common.py
@@ -19,7 +19,7 @@
     from commonmark import entitytrans
     HTMLunescape = entitytrans._unescape
 
-ENTITY = '&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});'
+ENTITY = '&(?:#x[a-f0-9]{1,6}|#[0-9]{1,7}|[a-z][a-z0-9]{1,31});'
 
 TAGNAME = '[A-Za-z][A-Za-z0-9-]*'
 ATTRIBUTENAME = '[a-zA-Z_:][a-zA-Z0-9:._-]*'
@@ -45,7 +45,6 @@
     '\\\\' + ESCAPABLE + '|' + ENTITY, re.IGNORECASE)
 XMLSPECIAL = '[&<>"]'
 reXmlSpecial = re.compile(XMLSPECIAL)
-reXmlSpecialOrEntity = re.compile(ENTITY + '|' + XMLSPECIAL, re.IGNORECASE)
 
 
 def unescape_char(s):
@@ -102,19 +101,13 @@ def replace_unsafe_char(s):
     return UNSAFE_MAP.get(s, s)
 
 
-def escape_xml(s, preserve_entities):
+def escape_xml(s):
     if s is None:
         return ''
     if re.search(reXmlSpecial, s):
-        if preserve_entities:
-            return re.sub(
-                reXmlSpecialOrEntity,
-                lambda m: replace_unsafe_char(m.group()),
-                s)
-        else:
-            return re.sub(
-                reXmlSpecial,
-                lambda m: replace_unsafe_char(m.group()),
-                s)
+        return re.sub(
+            reXmlSpecial,
+            lambda m: replace_unsafe_char(m.group()),
+            s)
     else:
         return s
diff --git a/commonmark/inlines.py b/commonmark/inlines.py
index f2f66c7..88a84cf 100644
--- a/commonmark/inlines.py
+++ b/commonmark/inlines.py
@@ -5,6 +5,7 @@
 from commonmark import common
 from commonmark.common import normalize_uri, unescape_string
 from commonmark.node import Node
+from commonmark.normalize_reference import normalize_reference
 
 if sys.version_info >= (3, 0):
     if sys.version_info >= (3, 4):
@@ -22,7 +23,7 @@
 ESCAPED_CHAR = '\\\\' + common.ESCAPABLE
 
 rePunctuation = re.compile(
-    r'[!"#$%&\'()*+,\-./:;<=>?@\[\]^_`{|}~\xA1\xA7\xAB\xB6\xB7\xBB'
+    r'[!"#$%&\'()*+,\-./:;<=>?@\[\]\\^_`{|}~\xA1\xA7\xAB\xB6\xB7\xBB'
     r'\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3'
     r'\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F'
     r'\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E'
@@ -54,10 +55,8 @@
     '|' +
     '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\'' +
     '|' +
-    '\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\))')
-reLinkDestinationBraces = re.compile(
-    '^(?:[<](?:[^ <>\\t\\n\\\\\\x00]' + '|' + ESCAPED_CHAR + '|' +
-    '\\\\)*[>])')
+    '\\((' + ESCAPED_CHAR + '|[^()\\x00])*\\))')
+reLinkDestinationBraces = re.compile(r'^(?:<(?:[^<>\n\\\x00]|\\.)*>)')
 
 reEscapable = re.compile('^' + common.ESCAPABLE)
 reEntityHere = re.compile('^' + common.ENTITY, re.IGNORECASE)
@@ -79,21 +78,11 @@
 reFinalSpace = re.compile(r' *$')
 reInitialSpace = re.compile(r'^ *')
 reSpaceAtEndOfLine = re.compile(r'^ *(?:\n|$)')
-reLinkLabel = re.compile('^\\[(?:[^\\\\\\[\\]]|' + ESCAPED_CHAR +
-                         '|\\\\){0,1000}\\]')
+reLinkLabel = re.compile(r'^\[(?:[^\\\[\]]|\\.){0,1000}\]')
 # Matches a string of non-special characters.
 reMain = re.compile(r'^[^\n`\[\]\\!<&*_\'"]+', re.MULTILINE)
 
 
-def normalizeReference(s):
-    """Normalize reference label.
-
-    Collapse internal whitespace to single space, remove
-    leading/trailing whitespace, case fold.
-    """
-    return re.sub(r'\s+', ' ', s.strip()).upper()
-
-
 def text(s):
     node = Node('text', None)
     node.literal = s
@@ -175,12 +164,14 @@ def parseBackticks(self, block):
         after_open_ticks = self.pos
         matched = self.match(reTicks)
         while matched is not None:
-            if (matched == ticks):
+            if matched == ticks:
                 node = Node('code', None)
-                c = self.subject[after_open_ticks:self.pos - len(ticks)]
-                c = c.strip()
-                c = re.sub(reWhitespace, ' ', c)
-                node.literal = c
+                contents = self.subject[after_open_ticks:self.pos-len(ticks)] \
+                    .replace('\n', ' ')
+                if contents.lstrip(' ') and contents[0] == contents[-1] == ' ':
+                    node.literal = contents[1:-1]
+                else:
+                    node.literal = contents
                 block.append_child(node)
                 return True
             matched = self.match(reTicks)
@@ -394,8 +385,9 @@ def processEmphasis(self, stack_bottom):
                        opener != openers_bottom[closercc]):
                     odd_match = (closer.get('can_open') or
                                  opener.get('can_close')) and \
-                                 (opener.get('origdelims') +
-                                  closer.get('origdelims')) % 3 == 0
+                                 closer['origdelims'] % 3 != 0 and \
+                                 (opener['origdelims'] +
+                                  closer['origdelims']) % 3 == 0
                     if opener.get('cc') == closercc and \
                        opener.get('can_open') and \
                        not odd_match:
@@ -502,12 +494,17 @@ def parseLinkDestination(self):
         """
         res = self.match(reLinkDestinationBraces)
         if res is None:
+            if self.peek() == '<':
+                return None
             # TODO handrolled parser; res should be None or the string
             savepos = self.pos
             openparens = 0
-            c = self.peek()
-            while c is not None:
-                if c == '\\':
+            while True:
+                c = self.peek()
+                if c is None:
+                    break
+                if c == '\\' and re.search(
+                        reEscapable, self.subject[self.pos+1:self.pos+2]):
                     self.pos += 1
                     if self.peek() is not None:
                         self.pos += 1
@@ -524,7 +521,8 @@ def parseLinkDestination(self):
                     break
                 else:
                     self.pos += 1
-                c = self.peek()
+            if self.pos == savepos and c != ')':
+                return None
             res = self.subject[savepos:self.pos]
             return normalize_uri(unescape_string(res))
         else:
@@ -539,7 +537,7 @@ def parseLinkLabel(self):
         # Note: our regex will allow something of form [..\];
         # we disallow it here rather than using lookahead in the regex:
         m = self.match(reLinkLabel)
-        if m is None or len(m) > 1001 or re.search(r'([^\\]\\\]$|\[\n\]$)', m):
+        if m is None or len(m) > 1001:
             return 0
         else:
             return len(m)
@@ -647,7 +645,7 @@ def parseCloseBracket(self, block):
 
             if reflabel:
                 # lookup rawlabel in refmap
-                link = self.refmap.get(normalizeReference(reflabel))
+                link = self.refmap.get(normalize_reference(reflabel))
                 if link:
                     dest = link['destination']
                     title = link['title']
@@ -779,13 +777,15 @@ def parseReference(self, s, refmap):
         self.spnl()
 
         dest = self.parseLinkDestination()
-        if (dest is None or len(dest) == 0):
+        if dest is None:
             self.pos = startpos
             return 0
 
         beforetitle = self.pos
         self.spnl()
-        title = self.parseLinkTitle()
+        title = None
+        if self.pos != beforetitle:
+            title = self.parseLinkTitle()
         if title is None:
             title = ''
             # rewind before spaces
@@ -810,13 +810,13 @@ def parseReference(self, s, refmap):
             self.pos = startpos
             return 0
 
-        normlabel = normalizeReference(rawlabel)
-        if refmap.get(normlabel) == '':
+        normlabel = normalize_reference(rawlabel)
+        if normlabel == '':
             # label must contain non-whitespace characters
             self.pos = startpos
             return 0
 
-        if refmap.get(normlabel) is None:
+        if not refmap.get(normlabel):
             refmap[normlabel] = {
                 'destination': dest,
                 'title': title
diff --git a/commonmark/node.py b/commonmark/node.py
index 39e26b5..4c0ed40 100644
--- a/commonmark/node.py
+++ b/commonmark/node.py
@@ -78,6 +78,7 @@ def __init__(self, node_type, sourcepos):
         self.nxt = None
         self.sourcepos = sourcepos
         self.last_line_blank = False
+        self.last_line_checked = False
         self.is_open = True
         self.string_content = ''
         self.literal = None
diff --git a/commonmark/normalize_reference.py b/commonmark/normalize_reference.py
new file mode 100644
index 0000000..d68a3b1
--- /dev/null
+++ b/commonmark/normalize_reference.py
@@ -0,0 +1,165 @@
+"""Case-folding and whitespace normalization"""
+# Unicode Case Folding table has been derived from the following work:
+#
+#   CaseFolding-12.0.0.txt
+#   Date: 2019-01-22, 08:18:22 GMT
+#   (c) 2019 Unicode(R) Inc.
+#   Unicode and the Unicode Logo are registered trademarks
+#   of Unicode, Inc. in the U.S. and other countries.
+#   For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+#   Unicode Character Database
+#     For documentation, see http://www.unicode.org/reports/tr44/
+
+import re
+import sys
+from builtins import str, chr
+
+__all__ = ["normalize_reference"]
+
+if sys.version_info < (3,) and sys.maxunicode <= 0xffff:
+    # shim for Python 2.x UCS2 build
+    _unichr = chr
+
+    def chr(cdp):
+        if 0x10000 <= cdp < 0x110000:
+            cdp -= 0x10000
+            return (_unichr(0xd800 | (cdp >> 10)) +
+                    _unichr(0xdc00 | (cdp & 0x3ff)))
+        return _unichr(cdp)
+
+
+def _parse_table(tbl):
+    xlat = {}
+    cur_i, cur_j = -1, 0
+    for entry in tbl.split(';'):
+        arr = entry.split(',')
+        info = [int(x, 36) if x else 0 for x in arr[0].split(':')]
+        arr = [int(x, 36) for x in arr[1:]]
+        assert not any(x in xlat for x in arr)
+        sfx = ''.join(map(chr, arr))
+        streak, stride = 0, 1
+        if len(info) == 2:
+            fdt, delta = info
+        elif len(info) == 3:
+            fdt, streak, delta = info
+        else:
+            fdt, streak, delta, stride = info
+        assert streak >= 0 and stride >= 1
+        cur_i += fdt + 1
+        cur_j -= delta
+        assert cur_j != 0
+        i = cur_i
+        last = cur_i + streak
+        while i <= last:
+            # uniqueness and idempotency
+            assert i not in xlat and i + cur_j not in xlat
+            assert i not in arr
+            xlat[i] = chr(i + cur_j) + sfx
+            i += stride
+    return xlat
+
+
+XLAT = _parse_table(
+    # ===== Start of Unicode Case Folding table =====
+    '1t:p:-w;37:-kn;a:m:kn;n:6:;6:3w,37;w:1a:-31:2;1b:5k,lj;1:4:-5k:2;6:e::'
+    '2;f:-aa,32;:18:aa:2;19:3e;:4:-3e:2;5:7h;1:-da;:2:5t:2;3:-5p;:5p;1:1:-5'
+    'o;1:5o;2:-26;:-3f;:-1;:5m;1:-5o;:-2;1:-4;:2;:5s;3:-5u;:-2;1:-1;:4:5x:2'
+    ';5:-61;:61;1:-61;2:61;1:-61;:61;1:1:-60;1:2:60:2;3:-62;:4:62:4;b:-1;:1'
+    ';1:-1;:1;1:-1;:g:1:2;i:g::2;h:av,lo;:-aw;:2:1:2;3:2q;:-15;:12:-1l:2;13'
+    ':3n;1:g:-3n:2;n:-8bu;:8bu;1:4k;:-8gb;2:8br;1:5g;:-7c;:-2;:8:1y:2;72:-3'
+    '7;16:2:37:2;5:;8:-37;6:26;1:2:1;3:-r;1:1:1;1:m,lk,ld;:g:9;h:8:;c:b,lk,'
+    'ld;h:k;c:-7;:12;:-5;3:-a;:7;1:m:-n:2;n:1j;:-6;2:c;:4;1:-1t;1:8;:-8;2:2'
+    ':3n;2:f:-5u;f:v:1c;27:w:v:2;15:1g::2;1h:-e;:c:e:2;e:2m::2;2o:11:-1b;2d'
+    ':2a,136;26w:11:-5mq;12:6::6;mo:5:5m0;1on:4sm;:-1;:-9;:1:-2;1:1;:-7;:-o'
+    ';:-vzb;7:16:tj7;18:2:;8y:44:-2bl:2;45:5yn,mp;:-b,lk;:-2,lm;:-1,lm;:p,j'
+    'i;:-5xb;2:5wx,37;1:2m:-5yk:2;2v:7:9;f:5:;f:7:;f:7:;f:5:;7:5fn,lv;1:2,l'
+    'v,lc;1:2,lv,ld;1:2,lv,n6;2:6:-5ft:2;e:7:;n:7:3c,qh;7:7:8,qh;7:7:-o,qh;'
+    '7:7:8,qh;7:7:-1k,qh;7:7:8,qh;9:-6,qh;:5hc,qh;:6,qh;1:-3,n6;:1,n6,qh;:1'
+    ':-5j2;1:1:1u;1:5hd,qh;1:-6;3:-5h3,qh;:5ha,qh;:a,qh;1:-7,n6;:1,n6,qh;:3'
+    ':-5h6;3:5hb,qh;5:4,lk,lc;:1,lk,ld;2:3,n6;:1,lk,n6;:1:-5jq;1:1:2k;7:5h5'
+    ',lk,lc;:1,lk,ld;:5,lv;1:-2,n6;:1,lk,n6;:1:-5ju;1:1:2w;1:-2x;5:33,qh;:5'
+    'h0,qh;:-4,qh;1:7,n6;:1,n6,qh;:1:-5gu;1:1:-2;1:5h1,qh;89:8a;3:o2;:-3d;6'
+    ':-6ea;19:f:c;y:f;mq:p:-p;1ft:1a:-m;2n:1b;1:8ag;:-5ch;:5c1;2:4:-8a0:2;5'
+    ':8bh;:-v;:y;:-1;1:3:-8bj:3;b:1:8cg;1:2q:-8cg:2;2y:2::2;6:nym::nym;nyn:'
+    '16::2;1p:q::2;4h:c::2;f:1o::2;1y:2::2;3:r9h;:8:-r9h:2;c:;1:wmh;2:2:-wm'
+    'h:2;5:i::2;j:wn9;:b;:-4;:-a;:3;1:-1e;:o;:-l;:-xbp;:a:pr:2;d:;1:1d;:wlv'
+    ';:-5cb;q1:27:2oo;fpr:jii,2u;:1,2x;:1,30;:1,2u,2x;:1,2u,30;:-c,38;:1,38'
+    ';c:-z8,12u;:1,12d;:1,12j;:-9,12u;:b,12l;sp:p:-1cjn;ym:13:-8;4v:z:;1jj:'
+    '1e:-o;2e7:v:w;gwv:v:;o8v:x:-2'
+    # ===== End of Unicode Case Folding table =====
+)
+
+
+def _check_native(tbl):
+    """
+    Determine if Python's own native implementation
+    subsumes the supplied case folding table
+    """
+    try:
+        for i in tbl:
+            stv = chr(i)
+            if stv.casefold() == stv:
+                return False
+    except AttributeError:
+        return False
+    return True
+
+
+# Hoist version check out of function for performance
+SPACE_RE = re.compile(r'[ \t\r\n]+')
+if _check_native(XLAT):
+    def normalize_reference(string):
+        """
+        Normalize reference label: collapse internal whitespace
+        to single space, remove leading/trailing whitespace, case fold.
+        """
+        return SPACE_RE.sub(' ', string[1:-1].strip()).casefold()
+elif sys.version_info >= (3,) or sys.maxunicode > 0xffff:
+    def normalize_reference(string):
+        """
+        Normalize reference label: collapse internal whitespace
+        to single space, remove leading/trailing whitespace, case fold.
+        """
+        return SPACE_RE.sub(' ', string[1:-1].strip()).translate(XLAT)
+else:
+    def _get_smp_regex():
+        xls = sorted(x - 0x10000 for x in XLAT if x >= 0x10000)
+        xls.append(-1)
+        fmt, (dsh, opn, pip, cse) = str('\\u%04x'), str('-[|]')
+        rga, srk, erk = [str(r'[ \t\r\n]+')], 0, -2
+        for k in xls:
+            new_hir = (erk ^ k) >> 10 != 0
+            if new_hir or erk + 1 != k:
+                if erk >= 0 and srk != erk:
+                    if srk + 1 != erk:
+                        rga.append(dsh)
+                    rga.append(fmt % (0xdc00 + (erk & 0x3ff)))
+                if new_hir:
+                    if erk >= 0:
+                        rga.append(cse)
+                    if k < 0:
+                        break
+                    rga.append(pip)
+                    rga.append(fmt % (0xd800 + (k >> 10)))
+                    rga.append(opn)
+                srk = k
+                rga.append(fmt % (0xdc00 + (srk & 0x3ff)))
+            erk = k
+        return re.compile(str().join(rga))
+
+    def _subst_handler(matchobj):
+        src = matchobj.group(0)
+        hiv = ord(src[0])
+        if hiv < 0xd800:
+            return ' '
+        return XLAT[0x10000 + ((hiv & 0x3ff) << 10) | (ord(src[1]) & 0x3ff)]
+
+    SMP_RE = _get_smp_regex()
+
+    def normalize_reference(string):
+        """
+        Normalize reference label: collapse internal whitespace
+        to single space, remove leading/trailing whitespace, case fold.
+        """
+        return SMP_RE.sub(_subst_handler, string[1:-1].strip()).translate(XLAT)
diff --git a/commonmark/render/html.py b/commonmark/render/html.py
index 66612f7..b4ea345 100644
--- a/commonmark/render/html.py
+++ b/commonmark/render/html.py
@@ -29,8 +29,8 @@ def __init__(self, options={}):
         self.last_out = '\n'
         self.options = options
 
-    def escape(self, text, preserve_entities):
-        return escape_xml(text, preserve_entities)
+    def escape(self, text):
+        return escape_xml(text)
 
     def tag(self, name, attrs=None, selfclosing=None):
         """Helper function to produce an HTML tag."""
@@ -65,10 +65,10 @@ def link(self, node, entering):
         if entering:
             if not (self.options.get('safe') and
                     potentially_unsafe(node.destination)):
-                attrs.append(['href', self.escape(node.destination, True)])
+                attrs.append(['href', self.escape(node.destination)])
 
             if node.title:
-                attrs.append(['title', self.escape(node.title, True)])
+                attrs.append(['title', self.escape(node.title)])
 
             self.tag('a', attrs)
         else:
@@ -82,14 +82,14 @@ def image(self, node, entering):
                     self.lit('<img src="" alt="')
                 else:
                     self.lit('<img src="' +
-                             self.escape(node.destination, True) +
+                             self.escape(node.destination) +
                              '" alt="')
             self.disable_tags += 1
         else:
             self.disable_tags -= 1
             if self.disable_tags == 0:
                 if node.title:
-                    self.lit('" title="' + self.escape(node.title, True))
+                    self.lit('" title="' + self.escape(node.title))
                 self.lit('" />')
 
     def emph(self, node, entering):
@@ -132,7 +132,7 @@ def code_block(self, node, entering):
         attrs = self.attrs(node)
         if len(info_words) > 0 and len(info_words[0]) > 0:
             attrs.append(['class', 'language-' +
-                          self.escape(info_words[0], True)])
+                          self.escape(info_words[0])])
 
         self.cr()
         self.tag('pre')
@@ -214,7 +214,7 @@ def custom_block(self, node, entering):
     # Helper methods #
 
     def out(self, s):
-        self.lit(self.escape(s, False))
+        self.lit(self.escape(s))
 
     def attrs(self, node):
         att = []
diff --git a/commonmark/tests/unit_tests.py b/commonmark/tests/unit_tests.py
index aebcfbb..e9efef9 100644
--- a/commonmark/tests/unit_tests.py
+++ b/commonmark/tests/unit_tests.py
@@ -25,7 +25,6 @@ def text():
 from commonmark.render.html import HtmlRenderer
 from commonmark.inlines import InlineParser
 from commonmark.node import NodeWalker, Node
-from commonmark.utils import to_camel_case
 
 
 class TestCommonmark(unittest.TestCase):
@@ -100,6 +99,22 @@ def test_smart_dashes(self):
         html = renderer.render(ast)
         self.assertEqual(html, expected_html)
 
+    def test_regex_vulnerability_link_label(self):
+        i = 200
+        while i <= 2000:
+            s = commonmark.commonmark('[' + ('\\' * i) + '\n')
+            self.assertEqual(s, '<p>' + '[' + ('\\' * (i // 2)) + '</p>\n',
+                             '[\\\\... %d deep' % (i,))
+            i *= 10
+
+    def test_regex_vulnerability_link_destination(self):
+        i = 200
+        while i <= 2000:
+            s = commonmark.commonmark(('[](' * i) + '\n')
+            self.assertEqual(s, '<p>' + ('[](' * i) + '</p>\n',
+                             '[]( %d deep' % (i,))
+            i *= 10
+
 
 class TestHtmlRenderer(unittest.TestCase):
     def test_init(self):
@@ -138,16 +153,5 @@ def test_text(self, s):
         self.parser.parse(s)
 
 
-class TestUtils(unittest.TestCase):
-    def test_to_camel_case(self):
-        self.assertEqual(to_camel_case('snake_case'), 'SnakeCase')
-        self.assertEqual(to_camel_case(''), '')
-        self.assertEqual(to_camel_case('word'), 'Word')
-
-    @given(text())
-    def test_random_text(self, s):
-        to_camel_case(s)
-
-
 if __name__ == '__main__':
     unittest.main()
diff --git a/commonmark/utils.py b/commonmark/utils.py
deleted file mode 100644
index 7ea2584..0000000
--- a/commonmark/utils.py
+++ /dev/null
@@ -1,3 +0,0 @@
-def to_camel_case(snake_str):
-    components = snake_str.split('_')
-    return ''.join(x.title() for x in components)
diff --git a/spec.txt b/spec.txt
index 9fd5841..3913de4 100644
--- a/spec.txt
+++ b/spec.txt
@@ -1,8 +1,8 @@
 ---
 title: CommonMark Spec
 author: John MacFarlane
-version: 0.28
-date: '2017-08-01'
+version: 0.29
+date: '2019-04-06'
 license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)'
 ...
 
@@ -248,7 +248,7 @@ satisfactory replacement for a spec.
 
 Because there is no unambiguous spec, implementations have diverged
 considerably.  As a result, users are often surprised to find that
-a document that renders one way on one system (say, a github wiki)
+a document that renders one way on one system (say, a GitHub wiki)
 renders differently on another (say, converting to docbook using
 pandoc).  To make matters worse, because nothing in Markdown counts
 as a "syntax error," the divergence often isn't discovered right away.
@@ -328,8 +328,10 @@ that is not a [whitespace character].
 
 An [ASCII punctuation character](@)
 is `!`, `"`, `#`, `$`, `%`, `&`, `'`, `(`, `)`,
-`*`, `+`, `,`, `-`, `.`, `/`, `:`, `;`, `<`, `=`, `>`, `?`, `@`,
-`[`, `\`, `]`, `^`, `_`, `` ` ``, `{`, `|`, `}`, or `~`.
+`*`, `+`, `,`, `-`, `.`, `/` (U+0021–2F), 
+`:`, `;`, `<`, `=`, `>`, `?`, `@` (U+003A–0040),
+`[`, `\`, `]`, `^`, `_`, `` ` `` (U+005B–0060), 
+`{`, `|`, `}`, or `~` (U+007B–007E).
 
 A [punctuation character](@) is an [ASCII
 punctuation character] or anything in
@@ -514,8 +516,8 @@ one block element does not affect the inline parsing of any other.
 ## Container blocks and leaf blocks
 
 We can divide blocks into two types:
-[container block](@)s,
-which can contain other blocks, and [leaf block](@)s,
+[container blocks](@),
+which can contain other blocks, and [leaf blocks](@),
 which cannot.
 
 # Leaf blocks
@@ -527,7 +529,7 @@ Markdown document.
 
 A line consisting of 0-3 spaces of indentation, followed by a sequence
 of three or more matching `-`, `_`, or `*` characters, each followed
-optionally by any number of spaces, forms a
+optionally by any number of spaces or tabs, forms a
 [thematic break](@).
 
 ```````````````````````````````` example
@@ -825,7 +827,7 @@ Contents are parsed as inlines:
 ````````````````````````````````
 
 
-Leading and trailing blanks are ignored in parsing inline content:
+Leading and trailing [whitespace] is ignored in parsing inline content:
 
 ```````````````````````````````` example
 #                  foo                     
@@ -1024,6 +1026,20 @@ baz*
 baz</em></h1>
 ````````````````````````````````
 
+The contents are the result of parsing the headings's raw
+content as inlines.  The heading's raw content is formed by
+concatenating the lines and removing initial and final
+[whitespace].
+
+```````````````````````````````` example
+  Foo *bar
+baz*→
+====
+.
+<h1>Foo <em>bar
+baz</em></h1>
+````````````````````````````````
+
 
 The underlining can be any length:
 
@@ -1584,8 +1600,8 @@ begins with a code fence, indented no more than three spaces.
 
 The line with the opening code fence may optionally contain some text
 following the code fence; this is trimmed of leading and trailing
-spaces and called the [info string](@).
-The [info string] may not contain any backtick
+whitespace and called the [info string](@). If the [info string] comes
+after a backtick fence, it may not contain any backtick
 characters.  (The reason for this restriction is that otherwise
 some inline code would be incorrectly interpreted as the
 beginning of a fenced code block.)
@@ -1870,7 +1886,7 @@ Code fences (opening and closing) cannot contain internal spaces:
 ``` ```
 aaa
 .
-<p><code></code>
+<p><code> </code>
 aaa</p>
 ````````````````````````````````
 
@@ -1922,9 +1938,11 @@ bar
 
 
 An [info string] can be provided after the opening code fence.
-Opening and closing spaces will be stripped, and the first word, prefixed
-with `language-`, is used as the value for the `class` attribute of the
-`code` element within the enclosing `pre` element.
+Although this spec doesn't mandate any particular treatment of
+the info string, the first word is typically used to specify
+the language of the code block. In HTML output, the language is
+normally indicated by adding a class to the `code` element consisting
+of `language-` followed by the language name.
 
 ```````````````````````````````` example
 ```ruby
@@ -1973,6 +1991,18 @@ foo</p>
 ````````````````````````````````
 
 
+[Info strings] for tilde code blocks can contain backticks and tildes:
+
+```````````````````````````````` example
+~~~ aa ``` ~~~
+foo
+~~~
+.
+<pre><code class="language-aa">foo
+</code></pre>
+````````````````````````````````
+
+
 Closing code fences cannot have [info strings]:
 
 ```````````````````````````````` example
@@ -1991,14 +2021,15 @@ Closing code fences cannot have [info strings]:
 An [HTML block](@) is a group of lines that is treated
 as raw HTML (and will not be escaped in HTML output).
 
-There are seven kinds of [HTML block], which can be defined
-by their start and end conditions.  The block begins with a line that
-meets a [start condition](@) (after up to three spaces
-optional indentation).  It ends with the first subsequent line that
-meets a matching [end condition](@), or the last line of
-the document or other [container block]), if no line is encountered that meets the
-[end condition].  If the first line meets both the [start condition]
-and the [end condition], the block will contain just that line.
+There are seven kinds of [HTML block], which can be defined by their
+start and end conditions.  The block begins with a line that meets a
+[start condition](@) (after up to three spaces optional indentation).
+It ends with the first subsequent line that meets a matching [end
+condition](@), or the last line of the document, or the last line of
+the [container block](#container-blocks) containing the current HTML
+block, if no line is encountered that meets the [end condition].  If
+the first line meets both the [start condition] and the [end
+condition], the block will contain just that line.
 
 1.  **Start condition:**  line begins with the string `<script`,
 `<pre`, or `<style` (case-insensitive), followed by whitespace,
@@ -2029,7 +2060,7 @@ followed by one of the strings (case-insensitive) `address`,
 `footer`, `form`, `frame`, `frameset`,
 `h1`, `h2`, `h3`, `h4`, `h5`, `h6`, `head`, `header`, `hr`,
 `html`, `iframe`, `legend`, `li`, `link`, `main`, `menu`, `menuitem`,
-`meta`, `nav`, `noframes`, `ol`, `optgroup`, `option`, `p`, `param`,
+`nav`, `noframes`, `ol`, `optgroup`, `option`, `p`, `param`,
 `section`, `source`, `summary`, `table`, `tbody`, `td`,
 `tfoot`, `th`, `thead`, `title`, `tr`, `track`, `ul`, followed
 by [whitespace], the end of the line, the string `>`, or
@@ -2037,16 +2068,17 @@ the string `/>`.\
 **End condition:** line is followed by a [blank line].
 
 7.  **Start condition:**  line begins with a complete [open tag]
-or [closing tag] (with any [tag name] other than `script`,
-`style`, or `pre`) followed only by [whitespace]
-or the end of the line.\
+(with any [tag name] other than `script`,
+`style`, or `pre`) or a complete [closing tag],
+followed only by [whitespace] or the end of the line.\
 **End condition:** line is followed by a [blank line].
 
 HTML blocks continue until they are closed by their appropriate
-[end condition], or the last line of the document or other [container block].
-This means any HTML **within an HTML block** that might otherwise be recognised
-as a start condition will be ignored by the parser and passed through as-is,
-without changing the parser's state.
+[end condition], or the last line of the document or other [container
+block](#container-blocks).  This means any HTML **within an HTML
+block** that might otherwise be recognised as a start condition will
+be ignored by the parser and passed through as-is, without changing
+the parser's state.
 
 For instance, `<pre>` within a HTML block started by `<table>` will not affect
 the parser state; as the HTML block was started in by start condition 6, it
@@ -2069,7 +2101,7 @@ _world_.
 </td></tr></table>
 ````````````````````````````````
 
-In this case, the HTML block is terminated by the newline — the `**hello**`
+In this case, the HTML block is terminated by the newline — the `**Hello**`
 text remains verbatim — and regular parsing resumes, with a paragraph,
 emphasised `world` and inline and block HTML following.
 
@@ -2612,7 +2644,8 @@ bar
 
 
 However, a following blank line is needed, except at the end of
-a document, and except for blocks of types 1--5, above:
+a document, and except for blocks of types 1--5, [above][HTML
+block]:
 
 ```````````````````````````````` example
 <div>
@@ -2758,8 +2791,8 @@ an indented code block:
 
 Fortunately, blank lines are usually not necessary and can be
 deleted.  The exception is inside `<pre>` tags, but as described
-above, raw HTML blocks starting with `<pre>` *can* contain blank
-lines.
+[above][HTML blocks], raw HTML blocks starting with `<pre>`
+*can* contain blank lines.
 
 ## Link reference definitions
 
@@ -2811,7 +2844,7 @@ them.
 
 ```````````````````````````````` example
 [Foo bar]:
-<my%20url>
+<my url>
 'title'
 
 [Foo bar]
@@ -2877,6 +2910,29 @@ The link destination may not be omitted:
 <p>[foo]</p>
 ````````````````````````````````
 
+ However, an empty link destination may be specified using
+ angle brackets:
+
+```````````````````````````````` example
+[foo]: <>
+
+[foo]
+.
+<p><a href="">foo</a></p>
+````````````````````````````````
+
+The title must be separated from the link destination by
+whitespace:
+
+```````````````````````````````` example
+[foo]: <bar>(baz)
+
+[foo]
+.
+<p>[foo]: <bar>(baz)</p>
+<p>[foo]</p>
+````````````````````````````````
+
 
 Both title and destination can contain backslash escapes
 and literal backslashes:
@@ -3034,6 +3090,25 @@ and thematic breaks, and it need not be followed by a blank line.
 </blockquote>
 ````````````````````````````````
 
+```````````````````````````````` example
+[foo]: /url
+bar
+===
+[foo]
+.
+<h1>bar</h1>
+<p><a href="/url">foo</a></p>
+````````````````````````````````
+
+```````````````````````````````` example
+[foo]: /url
+===
+[foo]
+.
+<p>===
+<a href="/url">foo</a></p>
+````````````````````````````````
+
 
 Several [link reference definitions]
 can occur one after another, without intervening blank lines.
@@ -3070,6 +3145,17 @@ are defined:
 ````````````````````````````````
 
 
+Whether something is a [link reference definition] is
+independent of whether the link reference it defines is
+used in the document.  Thus, for example, the following
+document contains just a link reference definition, and
+no visible content:
+
+```````````````````````````````` example
+[foo]: /url
+.
+````````````````````````````````
+
 
 ## Paragraphs
 
@@ -3207,7 +3293,7 @@ aaa
 
 # Container blocks
 
-A [container block] is a block that has other
+A [container block](#container-blocks) is a block that has other
 blocks as its contents.  There are two basic kinds of container blocks:
 [block quotes] and [list items].
 [Lists] are meta-containers for [list items].
@@ -3669,9 +3755,8 @@ in some browsers.)
 The following rules define [list items]:
 
 1.  **Basic case.**  If a sequence of lines *Ls* constitute a sequence of
-    blocks *Bs* starting with a [non-whitespace character] and not separated
-    from each other by more than one blank line, and *M* is a list
-    marker of width *W* followed by 1 ≤ *N* ≤ 4 spaces, then the result
+    blocks *Bs* starting with a [non-whitespace character], and *M* is a
+    list marker of width *W* followed by 1 ≤ *N* ≤ 4 spaces, then the result
     of prepending *M* and the following spaces to the first line of
     *Ls*, and indenting subsequent lines of *Ls* by *W + N* spaces, is a
     list item with *Bs* as its contents.  The type of the list item
@@ -3981,8 +4066,7 @@ A start number may not be negative:
 
 2.  **Item starting with indented code.**  If a sequence of lines *Ls*
     constitute a sequence of blocks *Bs* starting with an indented code
-    block and not separated from each other by more than one blank line,
-    and *M* is a list marker of width *W* followed by
+    block, and *M* is a list marker of width *W* followed by
     one space, then the result of prepending *M* and the following
     space to the first line of *Ls*, and indenting subsequent lines of
     *Ls* by *W + 1* spaces, is a list item with *Bs* as its contents.
@@ -4458,9 +4542,10 @@ continued here.</p>
 6.  **That's all.** Nothing that is not counted as a list item by rules
     #1--5 counts as a [list item](#list-items).
 
-The rules for sublists follow from the general rules above.  A sublist
-must be indented the same number of spaces a paragraph would need to be
-in order to be included in the list item.
+The rules for sublists follow from the general rules
+[above][List items].  A sublist must be indented the same number
+of spaces a paragraph would need to be in order to be included
+in the list item.
 
 So, in this case we need two spaces indent:
 
@@ -5049,11 +5134,9 @@ item:
  - b
   - c
    - d
-    - e
-   - f
-  - g
- - h
-- i
+  - e
+ - f
+- g
 .
 <ul>
 <li>a</li>
@@ -5063,8 +5146,6 @@ item:
 <li>e</li>
 <li>f</li>
 <li>g</li>
-<li>h</li>
-<li>i</li>
 </ul>
 ````````````````````````````````
 
@@ -5074,7 +5155,7 @@ item:
 
   2. b
 
-    3. c
+   3. c
 .
 <ol>
 <li>
@@ -5089,6 +5170,49 @@ item:
 </ol>
 ````````````````````````````````
 
+Note, however, that list items may not be indented more than
+three spaces.  Here `- e` is treated as a paragraph continuation
+line, because it is indented more than three spaces:
+
+```````````````````````````````` example
+- a
+ - b
+  - c
+   - d
+    - e
+.
+<ul>
+<li>a</li>
+<li>b</li>
+<li>c</li>
+<li>d
+- e</li>
+</ul>
+````````````````````````````````
+
+And here, `3. c` is treated as in indented code block,
+because it is indented four spaces and preceded by a
+blank line.
+
+```````````````````````````````` example
+1. a
+
+  2. b
+
+    3. c
+.
+<ol>
+<li>
+<p>a</p>
+</li>
+<li>
+<p>b</p>
+</li>
+</ol>
+<pre><code>3. c
+</code></pre>
+````````````````````````````````
+
 
 This is a loose list, because there is a blank line between
 two of the list items:
@@ -5378,10 +5502,10 @@ Thus, for example, in
 <p><code>hi</code>lo`</p>
 ````````````````````````````````
 
-
 `hi` is parsed as code, leaving the backtick at the end as a literal
 backtick.
 
+
 ## Backslash escapes
 
 Any ASCII punctuation character may be backslash-escaped:
@@ -5415,6 +5539,7 @@ not have their usual Markdown meanings:
 \* not a list
 \# not a heading
 \[foo]: /url "not a reference"
+\&ouml; not a character entity
 .
 <p>*not emphasized*
 &lt;br/&gt; not a tag
@@ -5423,7 +5548,8 @@ not have their usual Markdown meanings:
 1. not a list
 * not a list
 # not a heading
-[foo]: /url &quot;not a reference&quot;</p>
+[foo]: /url &quot;not a reference&quot;
+&amp;ouml; not a character entity</p>
 ````````````````````````````````
 
 
@@ -5521,13 +5647,23 @@ foo
 
 ## Entity and numeric character references
 
-All valid HTML entity references and numeric character
-references, except those occuring in code blocks and code spans,
-are recognized as such and treated as equivalent to the
-corresponding Unicode characters.  Conforming CommonMark parsers
-need not store information about whether a particular character
-was represented in the source using a Unicode character or
-an entity reference.
+Valid HTML entity references and numeric character references
+can be used in place of the corresponding Unicode character,
+with the following exceptions:
+
+- Entity and character references are not recognized in code
+  blocks and code spans.
+
+- Entity and character references cannot stand in place of
+  special characters that define structural elements in
+  CommonMark.  For example, although `&#42;` can be used
+  in place of a literal `*` character, `&#42;` cannot replace
+  `*` in emphasis delimiters, bullet list markers, or thematic
+  breaks.
+
+Conforming CommonMark parsers need not store information about
+whether a particular character was represented in the source
+using a Unicode character or an entity reference.
 
 [Entity references](@) consist of `&` + any of the valid
 HTML5 entity names + `;`. The
@@ -5548,22 +5684,22 @@ references and their corresponding code points.
 
 [Decimal numeric character
 references](@)
-consist of `&#` + a string of 1--8 arabic digits + `;`. A
+consist of `&#` + a string of 1--7 arabic digits + `;`. A
 numeric character reference is parsed as the corresponding
 Unicode character. Invalid Unicode code points will be replaced by
 the REPLACEMENT CHARACTER (`U+FFFD`).  For security reasons,
 the code point `U+0000` will also be replaced by `U+FFFD`.
 
 ```````````````````````````````` example
-&#35; &#1234; &#992; &#98765432; &#0;
+&#35; &#1234; &#992; &#0;
 .
-<p># Ӓ Ϡ � �</p>
+<p># Ӓ Ϡ �</p>
 ````````````````````````````````
 
 
 [Hexadecimal numeric character
 references](@) consist of `&#` +
-either `X` or `x` + a string of 1-8 hexadecimal digits + `;`.
+either `X` or `x` + a string of 1-6 hexadecimal digits + `;`.
 They too are parsed as the corresponding Unicode character (this
 time specified with a hexadecimal numeral instead of decimal).
 
@@ -5578,9 +5714,13 @@ Here are some nonentities:
 
 ```````````````````````````````` example
 &nbsp &x; &#; &#x;
+&#987654321;
+&#abcdef0;
 &ThisIsNotDefined; &hi?;
 .
 <p>&amp;nbsp &amp;x; &amp;#; &amp;#x;
+&amp;#987654321;
+&amp;#abcdef0;
 &amp;ThisIsNotDefined; &amp;hi?;</p>
 ````````````````````````````````
 
@@ -5661,6 +5801,51 @@ text in code spans and code blocks:
 ````````````````````````````````
 
 
+Entity and numeric character references cannot be used
+in place of symbols indicating structure in CommonMark
+documents.
+
+```````````````````````````````` example
+&#42;foo&#42;
+*foo*
+.
+<p>*foo*
+<em>foo</em></p>
+````````````````````````````````
+
+```````````````````````````````` example
+&#42; foo
+
+* foo
+.
+<p>* foo</p>
+<ul>
+<li>foo</li>
+</ul>
+````````````````````````````````
+
+```````````````````````````````` example
+foo&#10;&#10;bar
+.
+<p>foo
+
+bar</p>
+````````````````````````````````
+
+```````````````````````````````` example
+&#9;foo
+.
+<p>→foo</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[a](url &quot;tit&quot;)
+.
+<p>[a](url &quot;tit&quot;)</p>
+````````````````````````````````
+
+
 ## Code spans
 
 A [backtick string](@)
@@ -5669,9 +5854,16 @@ preceded nor followed by a backtick.
 
 A [code span](@) begins with a backtick string and ends with
 a backtick string of equal length.  The contents of the code span are
-the characters between the two backtick strings, with leading and
-trailing spaces and [line endings] removed, and
-[whitespace] collapsed to single spaces.
+the characters between the two backtick strings, normalized in the
+following ways:
+
+- First, [line endings] are converted to [spaces].
+- If the resulting string both begins *and* ends with a [space]
+  character, but does not consist entirely of [space]
+  characters, a single [space] character is removed from the
+  front and back.  This allows you to include code that begins
+  or ends with backtick characters, which must be separated by
+  whitespace from the opening or closing backtick strings.
 
 This is a simple code span:
 
@@ -5683,10 +5875,11 @@ This is a simple code span:
 
 
 Here two backticks are used, because the code contains a backtick.
-This example also illustrates stripping of leading and trailing spaces:
+This example also illustrates stripping of a single leading and
+trailing space:
 
 ```````````````````````````````` example
-`` foo ` bar  ``
+`` foo ` bar ``
 .
 <p><code>foo ` bar</code></p>
 ````````````````````````````````
@@ -5701,58 +5894,79 @@ spaces:
 <p><code>``</code></p>
 ````````````````````````````````
 
+Note that only *one* space is stripped:
 
-[Line endings] are treated like spaces:
+```````````````````````````````` example
+`  ``  `
+.
+<p><code> `` </code></p>
+````````````````````````````````
+
+The stripping only happens if the space is on both
+sides of the string:
 
 ```````````````````````````````` example
-``
-foo
-``
+` a`
 .
-<p><code>foo</code></p>
+<p><code> a</code></p>
 ````````````````````````````````
 
+Only [spaces], and not [unicode whitespace] in general, are
+stripped in this way:
+
+```````````````````````````````` example
+` b `
+.
+<p><code> b </code></p>
+````````````````````````````````
 
-Interior spaces and [line endings] are collapsed into
-single spaces, just as they would be by a browser:
+No stripping occurs if the code span contains only spaces:
 
 ```````````````````````````````` example
-`foo   bar
-  baz`
+` `
+`  `
 .
-<p><code>foo bar baz</code></p>
+<p><code> </code>
+<code>  </code></p>
 ````````````````````````````````
 
 
-Not all [Unicode whitespace] (for instance, non-breaking space) is
-collapsed, however:
+[Line endings] are treated like spaces:
 
 ```````````````````````````````` example
-`a  b`
+``
+foo
+bar  
+baz
+``
 .
-<p><code>a  b</code></p>
+<p><code>foo bar   baz</code></p>
 ````````````````````````````````
 
+```````````````````````````````` example
+``
+foo 
+``
+.
+<p><code>foo </code></p>
+````````````````````````````````
 
-Q: Why not just leave the spaces, since browsers will collapse them
-anyway?  A:  Because we might be targeting a non-HTML format, and we
-shouldn't rely on HTML-specific rendering assumptions.
 
-(Existing implementations differ in their treatment of internal
-spaces and [line endings].  Some, including `Markdown.pl` and
-`showdown`, convert an internal [line ending] into a
-`<br />` tag.  But this makes things difficult for those who like to
-hard-wrap their paragraphs, since a line break in the midst of a code
-span will cause an unintended line break in the output.  Others just
-leave internal spaces as they are, which is fine if only HTML is being
-targeted.)
+Interior spaces are not collapsed:
 
 ```````````````````````````````` example
-`foo `` bar`
+`foo   bar 
+baz`
 .
-<p><code>foo `` bar</code></p>
+<p><code>foo   bar  baz</code></p>
 ````````````````````````````````
 
+Note that browsers will typically collapse consecutive spaces
+when rendering `<code>` elements, so it is recommended that
+the following CSS be used:
+
+    code{white-space: pre-wrap;}
+
 
 Note that backslash escapes do not work in code spans. All backslashes
 are treated literally:
@@ -5768,6 +5982,19 @@ Backslash escapes are never needed, because one can always choose a
 string of *n* backtick characters as delimiters, where the code does
 not contain any strings of exactly *n* backtick characters.
 
+```````````````````````````````` example
+``foo`bar``
+.
+<p><code>foo`bar</code></p>
+````````````````````````````````
+
+```````````````````````````````` example
+` foo `` bar `
+.
+<p><code>foo `` bar</code></p>
+````````````````````````````````
+
+
 Code span backticks have higher precedence than any other inline
 constructs except HTML tags and autolinks.  Thus, for example, this is
 not parsed as emphasized text, since the second `*` is part of a code
@@ -5905,15 +6132,17 @@ of one or more `_` characters that is not preceded or followed by
 a non-backslash-escaped `_` character.
 
 A [left-flanking delimiter run](@) is
-a [delimiter run] that is (a) not followed by [Unicode whitespace],
-and (b) not followed by a [punctuation character], or
+a [delimiter run] that is (1) not followed by [Unicode whitespace],
+and either (2a) not followed by a [punctuation character], or
+(2b) followed by a [punctuation character] and
 preceded by [Unicode whitespace] or a [punctuation character].
 For purposes of this definition, the beginning and the end of
 the line count as Unicode whitespace.
 
 A [right-flanking delimiter run](@) is
-a [delimiter run] that is (a) not preceded by [Unicode whitespace],
-and (b) not preceded by a [punctuation character], or
+a [delimiter run] that is (1) not preceded by [Unicode whitespace],
+and either (2a) not preceded by a [punctuation character], or
+(2b) preceded by a [punctuation character] and
 followed by [Unicode whitespace] or a [punctuation character].
 For purposes of this definition, the beginning and the end of
 the line count as Unicode whitespace.
@@ -6005,7 +6234,8 @@ The following rules define emphasis and strong emphasis:
     [delimiter runs].  If one of the delimiters can both
     open and close emphasis, then the sum of the lengths of the
     delimiter runs containing the opening and closing delimiters
-    must not be a multiple of 3.
+    must not be a multiple of 3 unless both lengths are
+    multiples of 3.
 
 10. Strong emphasis begins with a delimiter that
     [can open strong emphasis] and ends with a delimiter that
@@ -6015,7 +6245,8 @@ The following rules define emphasis and strong emphasis:
     [delimiter runs].  If one of the delimiters can both open
     and close strong emphasis, then the sum of the lengths of
     the delimiter runs containing the opening and closing
-    delimiters must not be a multiple of 3.
+    delimiters must not be a multiple of 3 unless both lengths
+    are multiples of 3.
 
 11. A literal `*` character cannot occur at the beginning or end of
     `*`-delimited emphasis or `**`-delimited strong emphasis, unless it
@@ -6634,7 +6865,19 @@ is precluded by the condition that a delimiter that
 can both open and close (like the `*` after `foo`)
 cannot form emphasis if the sum of the lengths of
 the delimiter runs containing the opening and
-closing delimiters is a multiple of 3.
+closing delimiters is a multiple of 3 unless
+both lengths are multiples of 3.
+
+
+For the same reason, we don't get two consecutive
+emphasis sections in this example:
+
+```````````````````````````````` example
+*foo**bar*
+.
+<p><em>foo**bar</em></p>
+````````````````````````````````
+
 
 The same condition ensures that the following
 cases are all strong emphasis nested inside
@@ -6663,6 +6906,23 @@ omitted:
 ````````````````````````````````
 
 
+When the lengths of the interior closing and opening
+delimiter runs are *both* multiples of 3, though,
+they can match to create emphasis:
+
+```````````````````````````````` example
+foo***bar***baz
+.
+<p>foo<em><strong>bar</strong></em>baz</p>
+````````````````````````````````
+
+```````````````````````````````` example
+foo******bar*********baz
+.
+<p>foo<strong><strong><strong>bar</strong></strong></strong>***baz</p>
+````````````````````````````````
+
+
 Indefinite levels of nesting are possible:
 
 ```````````````````````````````` example
@@ -7198,15 +7458,16 @@ following rules apply:
 A [link destination](@) consists of either
 
 - a sequence of zero or more characters between an opening `<` and a
-  closing `>` that contains no spaces, line breaks, or unescaped
+  closing `>` that contains no line breaks or unescaped
   `<` or `>` characters, or
 
-- a nonempty sequence of characters that does not include
-  ASCII space or control characters, and includes parentheses
-  only if (a) they are backslash-escaped or (b) they are part of
-  a balanced pair of unescaped parentheses.  (Implementations
-  may impose limits on parentheses nesting to avoid performance
-  issues, but at least three levels of nesting should be supported.)
+- a nonempty sequence of characters that does not start with
+  `<`, does not include ASCII space or control characters, and
+  includes parentheses only if (a) they are backslash-escaped or
+  (b) they are part of a balanced pair of unescaped parentheses.
+  (Implementations may impose limits on parentheses nesting to
+  avoid performance issues, but at least three levels of nesting
+  should be supported.)
 
 A [link title](@)  consists of either
 
@@ -7219,7 +7480,8 @@ A [link title](@)  consists of either
   backslash-escaped, or
 
 - a sequence of zero or more characters between matching parentheses
-  (`(...)`), including a `)` character only if it is backslash-escaped.
+  (`(...)`), including a `(` or `)` character only if it is
+  backslash-escaped.
 
 Although [link titles] may span multiple lines, they may not contain
 a [blank line].
@@ -7269,9 +7531,8 @@ Both the title and the destination may be omitted:
 <p><a href="">link</a></p>
 ````````````````````````````````
 
-
-The destination cannot contain spaces or line breaks,
-even if enclosed in pointy brackets:
+The destination can only contain spaces if it is
+enclosed in pointy brackets:
 
 ```````````````````````````````` example
 [link](/my uri)
@@ -7279,13 +7540,14 @@ even if enclosed in pointy brackets:
 <p>[link](/my uri)</p>
 ````````````````````````````````
 
-
 ```````````````````````````````` example
 [link](</my uri>)
 .
-<p>[link](&lt;/my uri&gt;)</p>
+<p><a href="/my%20uri">link</a></p>
 ````````````````````````````````
 
+The destination cannot contain line breaks,
+even if enclosed in pointy brackets:
 
 ```````````````````````````````` example
 [link](foo
@@ -7295,7 +7557,6 @@ bar)
 bar)</p>
 ````````````````````````````````
 
-
 ```````````````````````````````` example
 [link](<foo
 bar>)
@@ -7304,6 +7565,36 @@ bar>)
 bar>)</p>
 ````````````````````````````````
 
+The destination can contain `)` if it is enclosed
+in pointy brackets:
+
+```````````````````````````````` example
+[a](<b)c>)
+.
+<p><a href="b)c">a</a></p>
+````````````````````````````````
+
+Pointy brackets that enclose links must be unescaped:
+
+```````````````````````````````` example
+[link](<foo\>)
+.
+<p>[link](&lt;foo&gt;)</p>
+````````````````````````````````
+
+These are not links, because the opening pointy bracket
+is not matched properly:
+
+```````````````````````````````` example
+[a](<b)c
+[a](<b)c>
+[a](<b>c)
+.
+<p>[a](&lt;b)c
+[a](&lt;b)c&gt;
+[a](<b>c)</p>
+````````````````````````````````
+
 Parentheses inside the link destination may be escaped:
 
 ```````````````````````````````` example
@@ -8411,7 +8702,7 @@ If you want a link after a literal `!`, backslash-escape the
 as the link label.
 
 A [URI autolink](@) consists of `<`, followed by an
-[absolute URI] not containing `<`, followed by `>`.  It is parsed as
+[absolute URI] followed by `>`.  It is parsed as
 a link to the URI, with the URI as the link's label.
 
 An [absolute URI](@),
@@ -8624,7 +8915,7 @@ a [single-quoted attribute value], or a [double-quoted attribute value].
 
 An [unquoted attribute value](@)
 is a nonempty string of characters not
-including spaces, `"`, `'`, `=`, `<`, `>`, or `` ` ``.
+including [whitespace], `"`, `'`, `=`, `<`, `>`, or `` ` ``.
 
 A [single-quoted attribute value](@)
 consists of `'`, zero or more
@@ -8745,9 +9036,13 @@ Illegal [whitespace]:
 ```````````````````````````````` example
 < a><
 foo><bar/ >
+<foo bar=baz
+bim!bop />
 .
 <p>&lt; a&gt;&lt;
-foo&gt;&lt;bar/ &gt;</p>
+foo&gt;&lt;bar/ &gt;
+&lt;foo bar=baz
+bim!bop /&gt;</p>
 ````````````````````````````````
 
 
@@ -8944,10 +9239,10 @@ bar</em></p>
 Line breaks do not occur inside code spans
 
 ```````````````````````````````` example
-`code  
+`code 
 span`
 .
-<p><code>code span</code></p>
+<p><code>code  span</code></p>
 ````````````````````````````````
 
 
@@ -9365,7 +9660,8 @@ just above `stack_bottom` (or the first element if `stack_bottom`
 is NULL).
 
 We keep track of the `openers_bottom` for each delimiter
-type (`*`, `_`).  Initialize this to `stack_bottom`.
+type (`*`, `_`) and each length of the closing delimiter run
+(modulo 3).  Initialize this to `stack_bottom`.
 
 Then we repeat the following until we run out of potential
 closers:
@@ -9397,7 +9693,7 @@ closers:
     of the delimiter stack.  If the closing node is removed, reset
     `current_position` to the next element in the stack.
 
-- If none in found:
+- If none is found:
 
   + Set `openers_bottom` to the element before `current_position`.
     (We know that there are no openers for this kind of closer up to and