Skip to content

Commit

Permalink
Merge pull request markedjs#1034 from Feder1co5oave/new_autolink
Browse files Browse the repository at this point in the history
[commonmark]+[gfm] make autolinks compliant
  • Loading branch information
joshbruce authored Jan 24, 2018
2 parents 425ce01 + 1485c1e commit 70a805f
Show file tree
Hide file tree
Showing 7 changed files with 387 additions and 20 deletions.
45 changes: 31 additions & 14 deletions lib/marked.js
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ block.list = replace(block.list)
block._tag = '(?!(?:'
+ 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code'
+ '|var|samp|kbd|sub|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo'
+ '|span|br|wbr|ins|del|img)\\b)\\w+(?!:/|[^\\w\\s@]*@)\\b';
+ '|span|br|wbr|ins|del|img)\\b)\\w+(?!:|[^\\w\\s@]*@)\\b';

block.html = replace(block.html)
('comment', /<!--[\s\S]*?-->/)
('closed', /<(tag)[\s\S]+?<\/\1>/)
('closing', /<tag(?:"[^"]*"|'[^']*'|[^'">])*?>/)
('closing', /<tag(?:"[^"]*"|'[^']*'|\s[^'"\/>]*)*?\/?>/)
(/tag/g, block._tag)
();

Expand Down Expand Up @@ -460,9 +460,9 @@ Lexer.prototype.token = function(src, top) {

var inline = {
escape: /^\\([\\`*{}\[\]()#+\-.!_>])/,
autolink: /^<([^ <>]+(@|:\/)[^ <>]+)>/,
autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/,
url: noop,
tag: /^<!--[\s\S]*?-->|^<\/?\w+(?:"[^"]*"|'[^']*'|[^<'">])*?>/,
tag: /^<!--[\s\S]*?-->|^<\/?[a-zA-Z0-9\-]+(?:"[^"]*"|'[^']*'|\s[^<'">\/]*)*?\/?>/,
link: /^!?\[(inside)\]\(href\)/,
reflink: /^!?\[(inside)\]\s*\[([^\]]*)\]/,
nolink: /^!?\[((?:\[[^\]]*\]|\\[\[\]]|[^\[\]])*)\]/,
Expand All @@ -474,6 +474,14 @@ var inline = {
text: /^[\s\S]+?(?=[\\<!\[_*`]| {2,}\n|$)/
};

inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/;
inline._email = /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/;

inline.autolink = replace(inline.autolink)
('scheme', inline._scheme)
('email', inline._email)
()

inline._inside = /(?:\[[^\]]*\]|\\[\[\]]|[^\[\]]|\](?=[^\[]*\]))*/;
inline._href = /\s*<?([\s\S]*?)>?(?:\s+['"]([\s\S]*?)['"])?\s*/;

Expand Down Expand Up @@ -507,11 +515,14 @@ inline.pedantic = merge({}, inline.normal, {

inline.gfm = merge({}, inline.normal, {
escape: replace(inline.escape)('])', '~|])')(),
url: /^(https?:\/\/[^\s<]+[^<.,:;"')\]\s])/,
url: replace(/^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/)
('email', inline._email)
(),
_backpedal: /(?:[^?!.,:;*_~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_~)]+(?!$))+/,
del: /^~~(?=\S)([\s\S]*?\S)~~/,
text: replace(inline.text)
(']|', '~]|')
('|', '|https?://|')
('|', '|https?://|ftp://|www\\.|[a-zA-Z0-9.!#$%&\'*+/=?^_`{\\|}~-]+@|')
()
});

Expand Down Expand Up @@ -589,12 +600,8 @@ InlineLexer.prototype.output = function(src) {
if (cap = this.rules.autolink.exec(src)) {
src = src.substring(cap[0].length);
if (cap[2] === '@') {
text = escape(
cap[1].charAt(6) === ':'
? this.mangle(cap[1].substring(7))
: this.mangle(cap[1])
);
href = this.mangle('mailto:') + text;
text = escape(this.mangle(cap[1]));
href = 'mailto:' + text;
} else {
text = escape(cap[1]);
href = text;
Expand All @@ -605,9 +612,19 @@ InlineLexer.prototype.output = function(src) {

// url (gfm)
if (!this.inLink && (cap = this.rules.url.exec(src))) {
cap[0] = this.rules._backpedal.exec(cap[0])[0];
src = src.substring(cap[0].length);
text = escape(cap[1]);
href = text;
if (cap[2] === '@') {
text = escape(cap[0]);
href = 'mailto:' + text;
} else {
text = escape(cap[0]);
if (cap[1] === 'www.') {
href = 'http://' + text;
} else {
href = text;
}
}
out += this.renderer.link(href, null, text);
continue;
}
Expand Down
2 changes: 1 addition & 1 deletion test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ function fix() {
fs.readdirSync(path.resolve(__dirname, 'original')).forEach(function(file) {
var text = fs.readFileSync(path.resolve(__dirname, 'original', file));

if (file === 'hard_wrapped_paragraphs_with_list_like_lines.md') {
if (path.extname(file) === '.md') {
text = '---\ngfm: false\n---\n' + text;
}

Expand Down
91 changes: 91 additions & 0 deletions test/new/cm_autolinks.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
<p>Here are some valid autolinks:</p>

<h3 id="example-565">Example 565</h3>

<p><a href="http://foo.bar.baz">http://foo.bar.baz</a></p>

<h3 id="example-566">Example 566</h3>

<p><a href="http://foo.bar.baz/test?q=hello&amp;id=22&amp;boolean">http://foo.bar.baz/test?q=hello&amp;id=22&amp;boolean</a></p>

<h3 id="example-567">Example 567</h3>

<p><a href="irc://foo.bar:2233/baz">irc://foo.bar:2233/baz</a></p>

<h3 id="example-568">Example 568</h3>

<p>Uppercase is also fine:</p>

<p><a href="MAILTO:[email protected]">MAILTO:[email protected]</a></p>

<p>Note that many strings that count as absolute URIs for purposes of this spec are not valid URIs, because their schemes are not registered or because of other problems with their syntax:</p>

<h3 id="example-569">Example 569</h3>

<p><a href="a+b+c:d">a+b+c:d</a></p>

<h3 id="example-570">Example 570</h3>

<p><a href="made-up-scheme://foo,bar">made-up-scheme://foo,bar</a></p>

<h3 id="example-571">Example 571</h3>

<p><a href="http://../">http://../</a></p>

<h3 id="example-572">Example 572</h3>

<p><a href="localhost:5001/foo">localhost:5001/foo</a></p>

<h3 id="example-573">Example 573</h3>

<p>Spaces are not allowed in autolinks:</p>

<p>&lt;http://foo.bar/baz bim&gt;</p>

<h3 id="example-574">Example 574</h3>

<p>Backslash-escapes do not work inside autolinks:</p>

<p><a href="http://example.com/%5C%5B%5C">http://example.com/\[\</a></p>

<p>Examples of email autolinks:</p>

<h3 id="example-575">Example 575</h3>

<p><a href="mailto:[email protected]">[email protected]</a></p>

<h3 id="example-576">Example 576</h3>

<p><a href="mailto:[email protected]">[email protected]</a></p>

<h3 id="example-577">Example 577</h3>

<p>Backslash-escapes do not work inside email autolinks:</p>

<p>&lt;[email protected]&gt;</p>

<p>These are not autolinks:</p>

<h3 id="example-578">Example 578</h3>

<p>&lt;&gt;</p>

<h3 id="example-579">Example 579</h3>

<p>&lt; http://foo.bar &gt;</p>

<h3 id="example-580">Example 580</h3>

<p>&lt;m:abc&gt;</p>

<h3 id="example-581">Example 581</h3>

<p>&lt;foo.bar.baz&gt;</p>

<h3 id="example-582">Example 582</h3>

<p>http://example.com</p>

<h3 id="example-583">Example 583</h3>

<p>[email protected]</p>
96 changes: 96 additions & 0 deletions test/new/cm_autolinks.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
---
gfm: false
mangle: false
---

Here are some valid autolinks:

### Example 565

<http://foo.bar.baz>

### Example 566

<http://foo.bar.baz/test?q=hello&id=22&boolean>

### Example 567

<irc://foo.bar:2233/baz>

### Example 568

Uppercase is also fine:

<MAILTO:[email protected]>

Note that many strings that count as absolute URIs for purposes of this spec are not valid URIs, because their schemes are not registered or because of other problems with their syntax:

### Example 569

<a+b+c:d>

### Example 570

<made-up-scheme://foo,bar>

### Example 571

<http://../>

### Example 572

<localhost:5001/foo>

### Example 573

Spaces are not allowed in autolinks:

<http://foo.bar/baz bim>

### Example 574

Backslash-escapes do not work inside autolinks:

<http://example.com/\[\>

Examples of email autolinks:

### Example 575

<[email protected]>

### Example 576

<[email protected]>

### Example 577

Backslash-escapes do not work inside email autolinks:

<foo\+@bar.example.com>

These are not autolinks:

### Example 578

<>

### Example 579

< http://foo.bar >

### Example 580

<m:abc>

### Example 581

<foo.bar.baz>

### Example 582

http://example.com

### Example 583

[email protected]
85 changes: 83 additions & 2 deletions test/new/gfm_links.html
Original file line number Diff line number Diff line change
@@ -1,2 +1,83 @@
<p>This should be a link:
<a href="http://example.com/hello-world">http://example.com/hello-world</a>.</p>
<p>link with . <a href="http://example.com/hello-world">http://example.com/hello-world</a>.</p>

<p>link with ! <a href="http://example.com/hello-world">http://example.com/hello-world</a>!</p>

<p>link with : <a href="http://example.com/hello-world">http://example.com/hello-world</a>:</p>

<p>link with , <a href="http://example.com/hello-world">http://example.com/hello-world</a>,</p>

<p>link with ; <a href="http://example.com/hello-world">http://example.com/hello-world</a>;</p>

<p>link with ) <a href="http://example.com/hello-world">http://example.com/hello-world</a>)</p>

<p>link with nothing <a href="http://example.com/hello-world">http://example.com/hello-world</a></p>

<h3 id="example-597">Example 597</h3>

<p>The scheme http will be inserted automatically:</p>

<p><a href="http://www.commonmark.org">www.commonmark.org</a></p>
<h3 id="example-598">Example 598</h3>

<p>After a valid domain, zero or more non-space non-&lt; characters may follow:</p>

<p>Visit <a href="http://www.commonmark.org/help">www.commonmark.org/help</a> for more information.</p>

<h3 id="example-599">Example 599</h3>

<p>Trailing punctuation (specifically, ?, !, ., ,, :, *, _, and ~) will not be considered part of the autolink, though they may be included in the interior of the link:</p>

<p>Visit <a href="http://www.commonmark.org">www.commonmark.org</a>.</p>

<p>Visit <a href="http://www.commonmark.org/a.b">www.commonmark.org/a.b</a>.</p>

<h3 id="example-600">Example 600</h3>

<p><a href="http://www.google.com/search?q=Markup+(business)">www.google.com/search?q=Markup+(business)</a></p>

<p>(<a href="http://www.google.com/search?q=Markup+(business)">www.google.com/search?q=Markup+(business)</a>)</p>

<h3 id="example-601">Example 601</h3>

<p><a href="http://www.google.com/search?q=(business))+ok">www.google.com/search?q=(business))+ok</a></p>

<h3 id="example-602">Example 602</h3>

<p><a href="http://www.google.com/search?q=commonmark&amp;hl=en">www.google.com/search?q=commonmark&amp;hl=en</a></p>

<p><a href="http://www.google.com/search?q=commonmark">www.google.com/search?q=commonmark</a>&amp;</p>

<h3 id="example-603">Example 603</h3>

<p>&lt; immediately ends an autolink.</p>

<p><a href="http://www.commonmark.org/he">www.commonmark.org/he</a>&lt;lp</p>

<h3 id="example-604">Example 604</h3>

<p><a href="http://commonmark.org">http://commonmark.org</a></p>

<p>(Visit <a href="https://encrypted.google.com/search?q=Markup+(business)">https://encrypted.google.com/search?q=Markup+(business)</a>)</p>

<p>Anonymous FTP is available at <a href="ftp://foo.bar.baz">ftp://foo.bar.baz</a>.</p>

<p>Extended email autolinks:</p>

<h3 id="example-605">Example 605</h3>

<p><a href="mailto:[email protected]">[email protected]</a></p>

<h3 id="example-606">Example 606</h3>

<p>hello@mail+xyz.example isn&#39;t valid, but <a href="mailto:[email protected]">[email protected]</a> is.</p>

<h3 id="example-607">Example 607</h3>

<p><a href="mailto:[email protected]">[email protected]</a></p>

<p><a href="mailto:[email protected]">[email protected]</a>.</p>

<p>[email protected]</p>

<pre><code>[email protected]_
</code></pre>
Loading

0 comments on commit 70a805f

Please sign in to comment.