Skip to content

Commit

Permalink
md_resolve_links: Avoid link ref. def. lookup if...
Browse files Browse the repository at this point in the history
if we know that the bracket pair contains nested brackets. That makes
the label invalid anyway, therefore we know that there is no link ref.
def. to be found anyway.

In case of heavily nested bracket pairs, the lookup could lead to
quadratic parsing times.

Fixes #172.
  • Loading branch information
mity committed Jan 10, 2022
1 parent 7f44e1a commit b42e7f5
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 9 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ Fixes:
One dash (optionally with a leading or tailing `:` appended or prepended)
is now sufficient. This improves compatibility with the GFM.

* [#172](https://github.com/mity/md4c/issues/172):
Fix quadratic time behavior caused by unnecessary lookup for link reference
definition even if the potential label contains nested brackets.


## Version 0.4.8

Expand Down
23 changes: 15 additions & 8 deletions src/md4c.c
Original file line number Diff line number Diff line change
Expand Up @@ -2487,6 +2487,7 @@ struct MD_MARK_tag {
#define MD_MARK_EMPH_MOD3_MASK (0x40 | 0x80)
#define MD_MARK_AUTOLINK 0x20 /* Distinguisher for '<', '>'. */
#define MD_MARK_VALIDPERMISSIVEAUTOLINK 0x20 /* For permissive autolinks. */
#define MD_MARK_HASNESTEDBRACKETS 0x20 /* For '[' to rule out invalid link labels early */

static MD_MARKCHAIN*
md_asterisk_chain(MD_CTX* ctx, unsigned flags)
Expand Down Expand Up @@ -3367,17 +3368,20 @@ md_analyze_bracket(MD_CTX* ctx, int mark_index)
* or enclosing pair of brackets (if the inner is the link, the outer
* one cannot be.)
*
* Therefore we here only construct a list of resolved '[' ']' pairs
* ordered by position of the closer. This allows ur to analyze what is
* or is not link in the right order, from inside to outside in case
* of nested brackets.
* Therefore we here only construct a list of '[' ']' pairs ordered by
* position of the closer. This allows us to analyze what is or is not
* link in the right order, from inside to outside in case of nested
* brackets.
*
* The resolving itself is deferred into md_resolve_links().
* The resolving itself is deferred to md_resolve_links().
*/

MD_MARK* mark = &ctx->marks[mark_index];

if(mark->flags & MD_MARK_POTENTIAL_OPENER) {
if(BRACKET_OPENERS.head != -1)
ctx->marks[BRACKET_OPENERS.tail].flags |= MD_MARK_HASNESTEDBRACKETS;

md_mark_chain_append(ctx, &BRACKET_OPENERS, mark_index);
return;
}
Expand Down Expand Up @@ -3542,10 +3546,12 @@ md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
if(next_opener != NULL && next_opener->beg == closer->end) {
if(next_closer->beg > closer->end + 1) {
/* Might be full reference link. */
is_link = md_is_link_reference(ctx, lines, n_lines, next_opener->beg, next_closer->end, &attr);
if(!(next_opener->flags & MD_MARK_HASNESTEDBRACKETS))
is_link = md_is_link_reference(ctx, lines, n_lines, next_opener->beg, next_closer->end, &attr);
} else {
/* Might be shortcut reference link. */
is_link = md_is_link_reference(ctx, lines, n_lines, opener->beg, closer->end, &attr);
if(!(opener->flags & MD_MARK_HASNESTEDBRACKETS))
is_link = md_is_link_reference(ctx, lines, n_lines, opener->beg, closer->end, &attr);
}

if(is_link < 0)
Expand Down Expand Up @@ -3602,7 +3608,8 @@ md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, int n_lines)

if(!is_link) {
/* Might be collapsed reference link. */
is_link = md_is_link_reference(ctx, lines, n_lines, opener->beg, closer->end, &attr);
if(!(opener->flags & MD_MARK_HASNESTEDBRACKETS))
is_link = md_is_link_reference(ctx, lines, n_lines, opener->beg, closer->end, &attr);
if(is_link < 0)
return -1;
}
Expand Down
5 changes: 4 additions & 1 deletion test/pathological_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,10 @@
re.compile("(\[ \(\]\(){50000}")),
"broken thematic break":
(("* " * 50000 + "a"),
re.compile("<ul>\r?\n(<li><ul>\r?\n){49999}<li>a</li>\r?\n</ul>\r?\n(</li>\r?\n</ul>\r?\n){49999}"))
re.compile("<ul>\r?\n(<li><ul>\r?\n){49999}<li>a</li>\r?\n</ul>\r?\n(</li>\r?\n</ul>\r?\n){49999}")),
"nested invalid link references":
(("[" * 50000 + "]" * 50000 + "\n\n[a]: /b"),
re.compile("\[{50000}\]{50000}"))
}

whitespace_re = re.compile('/s+/')
Expand Down

0 comments on commit b42e7f5

Please sign in to comment.