Skip to content

Commit

Permalink
Removed Lookbehinds
Browse files Browse the repository at this point in the history
Beginning and End delimiters for EM and Strong must be searched in a separate regex to work without lookbehinds. This invalidates the regex that skips over blocks (code, html, etc.) that take precedence over EM or Strong blocks.

Getting around this means we must now mask not only reflinks, but all enclosed blocks which were previously just skipped over in the Regex.

Add one check for overlapping Strong block when testing EM, now passes Commonmark 390 and 471
  • Loading branch information
calculuschild committed Jul 8, 2020
1 parent bd4f8c4 commit 211b9f9
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 20 deletions.
8 changes: 7 additions & 1 deletion src/Lexer.js
Original file line number Diff line number Diff line change
Expand Up @@ -324,17 +324,23 @@ module.exports = class Lexer {

// String with links masked to avoid interference with em and strong
let maskedSrc = src;
let match;

// Mask out reflinks
if (this.tokens.links) {
const links = Object.keys(this.tokens.links);
if (links.length > 0) {
let match;
while ((match = this.tokenizer.rules.inline.reflinkSearch.exec(maskedSrc)) != null) {
if (links.includes(match[0].slice(match[0].lastIndexOf('[') + 1, -1))) {
maskedSrc = maskedSrc.slice(0, match.index) + '[' + 'a'.repeat(match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.reflinkSearch.lastIndex);
}
}
}
}
// Mask out other blocks
while ((match = this.tokenizer.rules.inline.emSkip.exec(maskedSrc)) != null) {
maskedSrc = maskedSrc.slice(0, match.index) + '[' + 'a'.repeat(match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.emSkip.lastIndex);
}

while (src) {
// escape
Expand Down
40 changes: 34 additions & 6 deletions src/Tokenizer.js
Original file line number Diff line number Diff line change
Expand Up @@ -490,11 +490,25 @@ module.exports = class Tokenizer {
}

strong(src, maskedSrc, prevChar = '') {
let cap = this.rules.inline.preStrong.exec(src);
let match = this.rules.inline.strStart.exec(src);

if (cap) {
if (match) {
maskedSrc = maskedSrc.slice(-1 * src.length);
cap = this.rules.inline.strong.exec(maskedSrc);
let strEnd;

if(match[0] == "**")
strEnd = this.rules.inline.strEndAst;
else
strEnd = this.rules.inline.strEndUnd;

strEnd.lastIndex = 0;

let cap;
while ((match = strEnd.exec(maskedSrc)) != null) {
cap = this.rules.inline.strong.exec(maskedSrc.slice(0,match.index+3));
if (cap)
break;
}

if (cap) {
if (!cap[1] || (cap[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar)))) {
Expand All @@ -509,11 +523,25 @@ module.exports = class Tokenizer {
}

em(src, maskedSrc, prevChar = '') {
let cap = this.rules.inline.preEm.exec(src);
let match = this.rules.inline.emStart.exec(src);

if (cap) {
if (match) {
maskedSrc = maskedSrc.slice(-1 * src.length);
cap = this.rules.inline.em.exec(maskedSrc);
let emEnd;

if(match[0] == "*")
emEnd = this.rules.inline.emEndAst;
else
emEnd = this.rules.inline.emEndUnd;

emEnd.lastIndex = 0;

let cap;
while ((match = emEnd.exec(maskedSrc)) != null) {
cap = this.rules.inline.em.exec(maskedSrc.slice(0,match.index+2));
if (cap)
break;
}

if (cap) {
if (!cap[1] || (cap[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar)))) {
Expand Down
38 changes: 33 additions & 5 deletions src/rules.js
Original file line number Diff line number Diff line change
Expand Up @@ -169,11 +169,15 @@ const inline = {
reflink: /^!?\[(label)\]\[(?!\s*\])((?:\\[\[\]]?|[^\[\]\\])+)\]/,
nolink: /^!?\[(?!\s*\])((?:\[[^\[\]]*\]|\\[\[\]]|[^\[\]])*)\](?:\[\])?/,
reflinkSearch: 'reflink|nolink(?!\\()',
preStrong: /^(?:\*\*|__)/,
strong: /^(?:(\*\*(?=[*punctuation]))|\*\*)(?![\s])((?:(?:(?!emSkip)(?:[^*]|[\\\s]\*)|emSkip)|(?:(?:(?!emSkip)(?:[^*]|[\\\s]\*)|emSkip)*?(?<!\\)\*){2})+?)(?:(?<![punctuation\s])\*\*(?!\*)|(?<=[punctuation])\*\*(?!\*)(?:(?=[punctuation\s]|$)))|^__(?![\s])((?:(?:(?!emSkip)(?:[^_]|[\\\s]_)|emSkip)|(?:(?:(?!emSkip)(?:[^_]|[\\\s]_)|emSkip)*?(?<!\\)_){2})+?)(?:(?<![\s])__(?!_)(?:(?=[punctuation\s])|$))/,
preEm: /^[*_]/,
strStart: /^\*\*|__/,
strEndAst: /[^punctuation\s]\*\*(?!\*)|[punctuation]\*\*(?!\*)(?:(?=[punctuation\s]|$))/,
strEndUnd: /[^\s]__(?!_)(?:(?=[punctuation\s])|$)/,
strong: /^(?:(\*\*(?=[*punctuation]))|\*\*)(?![\s])((?:(?:(?!evSkip)(?:[^*]|\\\*)|evSkip)|(?:(?:(?!evSkip)(?:[^*]|\\\*)|evSkip)*?(?<!\\)\*){2})+?)\*\*$|^__(?![\s])((?:(?:(?!evSkip)(?:[^_]|\\_)|evSkip)|(?:(?:(?!evSkip)(?:[^_]|\\_)|evSkip)*?(?<!\\)_){2})+?)__$/,
emStart: /^[*_]/,
emEndAst: /[^punctuation\s]\*(?!\*)|[punctuation]\*(?!\*)(?:(?=[punctuation\s]|$))/,
emEndUnd: /[^\s]_(?!_)(?:(?=[punctuation\s])|$)/,
// (1) returns if starts w/ punctuation | (2) ⬐Check groups to skip over ⬐ skip if needed ⬐repeat logic for inner *'s (must be in pairs)⬎ ⬐last char can't be punct OR final * must also be followed by punct (or endline) | (3) Underscores ⬐Check groups to skip over ⬐skip if needed ⬐repeat logic for inner _'s (must be in pairs)⬎ ⬐last char can't be a space, and final _ must preceed punct or \s (or endline)
em: /^(?:(\*(?=[punctuation]))|\*)(?![*\s])(?:(?:(?!emSkip)(?:[^*]|[\\\s]\*)|emSkip)|(?:(?:(?!emSkip)(?:[^*]|[\\\s]\*)|emSkip)*?(?<!\\)\*){2})*?(?:(?<![punctuation\s])\*(?!\*)|(?<=[punctuation])\*(?!\*)(?:(?=[punctuation\s]|$)))|^_(?![_\s])((?:(?:(?!emSkip)(?:[^_]|[\\\s]_)|emSkip)|(?:(?:(?!emSkip)(?:[^_]|[\\\s]_)|emSkip)*?(?<!\\)_){2})*?)(?:(?<![\s])_(?!_)(?:(?=[punctuation\s])|$))/,
em: /^(?:(\*(?=[punctuation]))|\*)(?![*\s])(?:(?:(?!evSkip)(?:[^*]|\\\*)|evSkip)|\*(?:(?!evSkip)(?:[^*]|\\\*)|evSkip)*?\*)*?\*$|^_(?![_\s])(?:(?:(?!evSkip)(?:[^_]|\\_)|evSkip)|(?:(?:(?!evSkip)(?:[^_]|\\_)|evSkip)*?_){2})*?_$/,
code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/,
br: /^( {2,}|\\)\n(?!\s*$)/,
del: noopTest,
Expand All @@ -188,17 +192,41 @@ inline.punctuation = edit(inline.punctuation).replace(/punctuation/g, inline._pu

// sequences em should skip over [title](link), `code`, <html>
inline._emSkip = '\\[[^\\]]*?\\]\\([^\\)]*?\\)|`[^`]*?`|<[^>]*?>';
inline._strSkip = '\\[[^\\]]*?\\]\\([^\\)]*?\\)|`[^`]*?`|<[^>]*?>';
inline._evSkip = '__[^_]*?__';

inline.em = edit(inline.em)
.replace(/punctuation/g, inline._punctuation)
.replace(/emSkip/g, inline._emSkip)
.replace(/evSkip/g, inline._evSkip)
.getRegex();

inline.emEndAst = edit(inline.emEndAst, 'g')
.replace(/punctuation/g, inline._punctuation)
.getRegex();

inline.emEndUnd = edit(inline.emEndUnd, 'g')
.replace(/punctuation/g, inline._punctuation)
.getRegex();

inline.emSkip = edit(inline._emSkip, 'g')
.getRegex();

inline.evSkip = edit(inline._evSkip, 'g')
.getRegex();

inline.strong = edit(inline.strong)
.replace(/punctuation/g, inline._punctuation)
.replace(/emSkip/g, inline._emSkip)
.getRegex();

inline.strEndAst = edit(inline.strEndAst, 'g')
.replace(/punctuation/g, inline._punctuation)
.getRegex();

inline.strEndUnd = edit(inline.strEndUnd, 'g')
.replace(/punctuation/g, inline._punctuation)
.getRegex();

inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g;

inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/;
Expand Down
6 changes: 2 additions & 4 deletions test/specs/commonmark/commonmark.0.29.json
Original file line number Diff line number Diff line change
Expand Up @@ -3160,8 +3160,7 @@
"example": 390,
"start_line": 6672,
"end_line": 6676,
"section": "Emphasis and strong emphasis",
"shouldFail": true
"section": "Emphasis and strong emphasis"
},
{
"markdown": "**(**foo)\n",
Expand Down Expand Up @@ -3828,8 +3827,7 @@
"example": 471,
"start_line": 7355,
"end_line": 7359,
"section": "Emphasis and strong emphasis",
"shouldFail": true
"section": "Emphasis and strong emphasis"
},
{
"markdown": "*[bar*](/url)\n",
Expand Down
6 changes: 2 additions & 4 deletions test/specs/gfm/commonmark.0.29.json
Original file line number Diff line number Diff line change
Expand Up @@ -3160,8 +3160,7 @@
"example": 390,
"start_line": 6672,
"end_line": 6676,
"section": "Emphasis and strong emphasis",
"shouldFail": true
"section": "Emphasis and strong emphasis"
},
{
"markdown": "**(**foo)\n",
Expand Down Expand Up @@ -3828,8 +3827,7 @@
"example": 471,
"start_line": 7355,
"end_line": 7359,
"section": "Emphasis and strong emphasis",
"shouldFail": true
"section": "Emphasis and strong emphasis"
},
{
"markdown": "*[bar*](/url)\n",
Expand Down

0 comments on commit 211b9f9

Please sign in to comment.