Skip to content
This repository has been archived by the owner on Mar 26, 2022. It is now read-only.

Commit

Permalink
Synchronize code with upstream commonmark.js 0.29
Browse files Browse the repository at this point in the history
 * Updated code to match commonmark/commonmark.js@5eebfd3
 * Improved block dispatch performance by using a pre-cached dict instead of importlib
 * Add missing Unicode case folding for reference normalization
 * Fixed a bug where empty link labels were not properly recognized
  • Loading branch information
iamahuman committed May 2, 2019
1 parent 75add17 commit cee6b84
Show file tree
Hide file tree
Showing 9 changed files with 700 additions and 235 deletions.
87 changes: 48 additions & 39 deletions commonmark/blocks.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
from __future__ import absolute_import, unicode_literals

import re
from importlib import import_module
from commonmark import common
from commonmark.common import unescape_string
from commonmark.inlines import InlineParser
from commonmark.node import Node
from commonmark.utils import to_camel_case


CODE_INDENT = 4
Expand All @@ -21,7 +19,7 @@
r'^<[/]?(?:address|article|aside|base|basefont|blockquote|body|'
r'caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|'
r'fieldset|figcaption|figure|footer|form|frame|frameset|h1|head|'
r'header|hr|html|iframe|legend|li|link|main|menu|menuitem|meta|'
r'header|hr|html|iframe|legend|li|link|main|menu|menuitem|'
r'nav|noframes|ol|optgroup|option|p|param|section|source|title|'
r'summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)'
r'(?:\s|[/]?[>]|$)',
Expand All @@ -45,7 +43,7 @@
reBulletListMarker = re.compile(r'^[*+-]')
reOrderedListMarker = re.compile(r'^(\d{1,9})([.)])')
reATXHeadingMarker = re.compile(r'^#{1,6}(?:[ \t]+|$)')
reCodeFence = re.compile(r'^`{3,}(?!.*`)|^~{3,}(?!.*~)')
reCodeFence = re.compile(r'^`{3,}(?!.*`)|^~{3,}')
reClosingCodeFence = re.compile(r'^(?:`{3,}|~{3,})(?= *$)')
reSetextHeadingLine = re.compile(r'^(?:=+|-+)[ \t]*$')
reLineEnding = re.compile(r'\r\n|\n|\r')
Expand All @@ -57,7 +55,7 @@ def is_blank(s):


def is_space_or_tab(s):
return s == ' ' or s == '\t'
return s in (' ', '\t')


def peek(ln, pos):
Expand All @@ -73,9 +71,12 @@ def ends_with_blank_line(block):
while block:
if block.last_line_blank:
return True
if (block.t == 'list' or block.t == 'item'):
if not block.last_line_checked and \
block.t in ('list', 'item'):
block.last_line_checked = True
block = block.last_child
else:
block.last_line_checked = True
break

return False
Expand All @@ -94,6 +95,8 @@ def parse_list_marker(parser, container):
'padding': None,
'marker_offset': parser.indent,
}
if parser.indent >= 4:
return None
m = re.search(reBulletListMarker, rest)
m2 = re.search(reOrderedListMarker, rest)
if m:
Expand Down Expand Up @@ -515,15 +518,25 @@ def setext_heading(parser, container=None):
parser.current_line[parser.next_nonspace:])
if m:
parser.close_unmatched_blocks()
heading = Node('heading', container.sourcepos)
heading.level = 1 if m.group()[0] == '=' else 2
heading.string_content = container.string_content
container.insert_after(heading)
container.unlink()
parser.tip = heading
parser.advance_offset(
len(parser.current_line) - parser.offset, False)
return 2
# resolve reference link definitiosn
while peek(container.string_content, 0) == '[':
pos = parser.inline_parser.parseReference(
container.string_content, parser.refmap)
if not pos:
break
container.string_content = container.string_content[pos:]
if container.string_content:
heading = Node('heading', container.sourcepos)
heading.level = 1 if m.group()[0] == '=' else 2
heading.string_content = container.string_content
container.insert_after(heading)
container.unlink()
parser.tip = heading
parser.advance_offset(
len(parser.current_line) - parser.offset, False)
return 2
else:
return 0

return 0

Expand Down Expand Up @@ -610,13 +623,8 @@ def add_child(self, tag, offset):
""" Add block of type tag as a child of the tip. If the tip can't
accept children, close and finalize it and try its parent,
and so on til we find a block that can accept children."""
block_class = getattr(import_module('commonmark.blocks'),
to_camel_case(self.tip.t))
while not block_class.can_contain(tag):
while not self.blocks[self.tip.t].can_contain(tag):
self.finalize(self.tip, self.line_number - 1)
block_class = getattr(
import_module('commonmark.blocks'),
to_camel_case(self.tip.t))

column_number = offset + 1
new_block = Node(tag, [[self.line_number, column_number], [0, 0]])
Expand Down Expand Up @@ -725,15 +733,15 @@ def incorporate_line(self, ln):
# For each containing block, try to parse the associated line start.
# Bail out on failure: container will point to the last matching block.
# Set all_matched to false if not all containers match.
last_child = container.last_child
while last_child and last_child.is_open:
while True:
last_child = container.last_child
if not (last_child and last_child.is_open):
break
container = last_child

self.find_next_nonspace()
block_class = getattr(
import_module('commonmark.blocks'),
to_camel_case(container.t))
rv = block_class.continue_(self, container)

rv = self.blocks[container.t].continue_(self, container)
if rv == 0:
# we've matched, keep going
pass
Expand All @@ -745,21 +753,19 @@ def incorporate_line(self, ln):
self.last_line_length = len(ln)
return
else:
raise ValueError('returned illegal value, must be 0, 1, or 2')
raise ValueError(
'continue_ returned illegal value, must be 0, 1, or 2')

if not all_matched:
# back up to last matching block
container = container.parent
break

last_child = container.last_child

self.all_closed = (container == self.oldtip)
self.last_matched_container = container

block_class = getattr(import_module('commonmark.blocks'),
to_camel_case(container.t))
matched_leaf = container.t != 'paragraph' and block_class.accepts_lines
matched_leaf = container.t != 'paragraph' and \
self.blocks[container.t].accepts_lines
starts = self.block_starts
starts_len = len(starts.METHODS)
# Unless last matched container is a code block, try new container
Expand Down Expand Up @@ -824,9 +830,7 @@ def incorporate_line(self, ln):
cont.last_line_blank = last_line_blank
cont = cont.parent

block_class = getattr(import_module('commonmark.blocks'),
to_camel_case(t))
if block_class.accepts_lines:
if self.blocks[t].accepts_lines:
self.add_line()
# if HtmlBlock, check for end condition
if t == 'html_block' and \
Expand All @@ -853,9 +857,8 @@ def finalize(self, block, line_number):
above = block.parent
block.is_open = False
block.sourcepos[1] = [line_number, self.last_line_length]
block_class = getattr(import_module('commonmark.blocks'),
to_camel_case(block.t))
block_class.finalize(self, block)

self.blocks[block.t].finalize(self, block)

self.tip = above

Expand Down Expand Up @@ -897,3 +900,9 @@ def parse(self, my_input):
self.finalize(self.tip, length)
self.process_inlines(self.doc)
return self.doc


CAMEL_RE = re.compile("(.)([A-Z](?:[a-z]+|(?<=[a-z0-9].)))")
Parser.blocks = dict(
(CAMEL_RE.sub(r'\1_\2', cls.__name__).lower(), cls)
for cls in Block.__subclasses__())
19 changes: 6 additions & 13 deletions commonmark/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from commonmark import entitytrans
HTMLunescape = entitytrans._unescape

ENTITY = '&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});'
ENTITY = '&(?:#x[a-f0-9]{1,6}|#[0-9]{1,7}|[a-z][a-z0-9]{1,31});'

TAGNAME = '[A-Za-z][A-Za-z0-9-]*'
ATTRIBUTENAME = '[a-zA-Z_:][a-zA-Z0-9:._-]*'
Expand All @@ -45,7 +45,6 @@
'\\\\' + ESCAPABLE + '|' + ENTITY, re.IGNORECASE)
XMLSPECIAL = '[&<>"]'
reXmlSpecial = re.compile(XMLSPECIAL)
reXmlSpecialOrEntity = re.compile(ENTITY + '|' + XMLSPECIAL, re.IGNORECASE)


def unescape_char(s):
Expand Down Expand Up @@ -102,19 +101,13 @@ def replace_unsafe_char(s):
return UNSAFE_MAP.get(s, s)


def escape_xml(s, preserve_entities):
def escape_xml(s):
if s is None:
return ''
if re.search(reXmlSpecial, s):
if preserve_entities:
return re.sub(
reXmlSpecialOrEntity,
lambda m: replace_unsafe_char(m.group()),
s)
else:
return re.sub(
reXmlSpecial,
lambda m: replace_unsafe_char(m.group()),
s)
return re.sub(
reXmlSpecial,
lambda m: replace_unsafe_char(m.group()),
s)
else:
return s
Loading

0 comments on commit cee6b84

Please sign in to comment.