diff --git a/lib/marked.js b/lib/marked.js index f1d66ed4f9..c8236e95d1 100644 --- a/lib/marked.js +++ b/lib/marked.js @@ -50,12 +50,12 @@ block.list = replace(block.list) block._tag = '(?!(?:' + 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code' + '|var|samp|kbd|sub|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo' - + '|span|br|wbr|ins|del|img)\\b)\\w+(?!:/|[^\\w\\s@]*@)\\b'; + + '|span|br|wbr|ins|del|img)\\b)\\w+(?!:|[^\\w\\s@]*@)\\b'; block.html = replace(block.html) ('comment', //) ('closed', /<(tag)[\s\S]+?<\/\1>/) - ('closing', /])*?>/) + ('closing', /]*)*?\/?>/) (/tag/g, block._tag) (); @@ -460,9 +460,9 @@ Lexer.prototype.token = function(src, top) { var inline = { escape: /^\\([\\`*{}\[\]()#+\-.!_>])/, - autolink: /^<([^ <>]+(@|:\/)[^ <>]+)>/, + autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/, url: noop, - tag: /^|^<\/?\w+(?:"[^"]*"|'[^']*'|[^<'">])*?>/, + tag: /^|^<\/?[a-zA-Z0-9\-]+(?:"[^"]*"|'[^']*'|\s[^<'">\/]*)*?\/?>/, link: /^!?\[(inside)\]\(href\)/, reflink: /^!?\[(inside)\]\s*\[([^\]]*)\]/, nolink: /^!?\[((?:\[[^\]]*\]|\\[\[\]]|[^\[\]])*)\]/, @@ -474,6 +474,14 @@ var inline = { text: /^[\s\S]+?(?=[\\?(?:\s+['"]([\s\S]*?)['"])?\s*/; @@ -507,11 +515,14 @@ inline.pedantic = merge({}, inline.normal, { inline.gfm = merge({}, inline.normal, { escape: replace(inline.escape)('])', '~|])')(), - url: /^(https?:\/\/[^\s<]+[^<.,:;"')\]\s])/, + url: replace(/^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/) + ('email', inline._email) + (), + _backpedal: /(?:[^?!.,:;*_~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_~)]+(?!$))+/, del: /^~~(?=\S)([\s\S]*?\S)~~/, text: replace(inline.text) (']|', '~]|') - ('|', '|https?://|') + ('|', '|https?://|ftp://|www\\.|[a-zA-Z0-9.!#$%&\'*+/=?^_`{\\|}~-]+@|') () }); @@ -589,12 +600,8 @@ InlineLexer.prototype.output = function(src) { if (cap = this.rules.autolink.exec(src)) { src = src.substring(cap[0].length); if (cap[2] === '@') { - text = escape( - cap[1].charAt(6) === ':' - ? this.mangle(cap[1].substring(7)) - : this.mangle(cap[1]) - ); - href = this.mangle('mailto:') + text; + text = escape(this.mangle(cap[1])); + href = 'mailto:' + text; } else { text = escape(cap[1]); href = text; @@ -605,9 +612,19 @@ InlineLexer.prototype.output = function(src) { // url (gfm) if (!this.inLink && (cap = this.rules.url.exec(src))) { + cap[0] = this.rules._backpedal.exec(cap[0])[0]; src = src.substring(cap[0].length); - text = escape(cap[1]); - href = text; + if (cap[2] === '@') { + text = escape(cap[0]); + href = 'mailto:' + text; + } else { + text = escape(cap[0]); + if (cap[1] === 'www.') { + href = 'http://' + text; + } else { + href = text; + } + } out += this.renderer.link(href, null, text); continue; } diff --git a/test/index.js b/test/index.js index defd7f3b53..491ae6b9c0 100644 --- a/test/index.js +++ b/test/index.js @@ -351,7 +351,7 @@ function fix() { fs.readdirSync(path.resolve(__dirname, 'original')).forEach(function(file) { var text = fs.readFileSync(path.resolve(__dirname, 'original', file)); - if (file === 'hard_wrapped_paragraphs_with_list_like_lines.md') { + if (path.extname(file) === '.md') { text = '---\ngfm: false\n---\n' + text; } diff --git a/test/new/cm_autolinks.html b/test/new/cm_autolinks.html new file mode 100644 index 0000000000..e7ae0ee416 --- /dev/null +++ b/test/new/cm_autolinks.html @@ -0,0 +1,91 @@ +

Here are some valid autolinks:

+ +

Example 565

+ +

http://foo.bar.baz

+ +

Example 566

+ +

http://foo.bar.baz/test?q=hello&id=22&boolean

+ +

Example 567

+ +

irc://foo.bar:2233/baz

+ +

Example 568

+ +

Uppercase is also fine:

+ +

MAILTO:FOO@BAR.BAZ

+ +

Note that many strings that count as absolute URIs for purposes of this spec are not valid URIs, because their schemes are not registered or because of other problems with their syntax:

+ +

Example 569

+ +

a+b+c:d

+ +

Example 570

+ +

made-up-scheme://foo,bar

+ +

Example 571

+ +

http://../

+ +

Example 572

+ +

localhost:5001/foo

+ +

Example 573

+ +

Spaces are not allowed in autolinks:

+ +

<http://foo.bar/baz bim>

+ +

Example 574

+ +

Backslash-escapes do not work inside autolinks:

+ +

http://example.com/\[\

+ +

Examples of email autolinks:

+ +

Example 575

+ +

foo@bar.example.com

+ +

Example 576

+ +

foo+special@Bar.baz-bar0.com

+ +

Example 577

+ +

Backslash-escapes do not work inside email autolinks:

+ +

<foo+@bar.example.com>

+ +

These are not autolinks:

+ +

Example 578

+ +

<>

+ +

Example 579

+ +

< http://foo.bar >

+ +

Example 580

+ +

<m:abc>

+ +

Example 581

+ +

<foo.bar.baz>

+ +

Example 582

+ +

http://example.com

+ +

Example 583

+ +

foo@bar.example.com

\ No newline at end of file diff --git a/test/new/cm_autolinks.md b/test/new/cm_autolinks.md new file mode 100644 index 0000000000..a19d830c9a --- /dev/null +++ b/test/new/cm_autolinks.md @@ -0,0 +1,96 @@ +--- +gfm: false +mangle: false +--- + +Here are some valid autolinks: + +### Example 565 + + + +### Example 566 + + + +### Example 567 + + + +### Example 568 + +Uppercase is also fine: + + + +Note that many strings that count as absolute URIs for purposes of this spec are not valid URIs, because their schemes are not registered or because of other problems with their syntax: + +### Example 569 + + + +### Example 570 + + + +### Example 571 + + + +### Example 572 + + + +### Example 573 + +Spaces are not allowed in autolinks: + + + +### Example 574 + +Backslash-escapes do not work inside autolinks: + + + +Examples of email autolinks: + +### Example 575 + + + +### Example 576 + + + +### Example 577 + +Backslash-escapes do not work inside email autolinks: + + + +These are not autolinks: + +### Example 578 + +<> + +### Example 579 + +< http://foo.bar > + +### Example 580 + + + +### Example 581 + + + +### Example 582 + +http://example.com + +### Example 583 + +foo@bar.example.com \ No newline at end of file diff --git a/test/new/gfm_links.html b/test/new/gfm_links.html index 4f62ae1de6..9058f2f201 100644 --- a/test/new/gfm_links.html +++ b/test/new/gfm_links.html @@ -1,2 +1,83 @@ -

This should be a link: -http://example.com/hello-world.

+

link with . http://example.com/hello-world.

+ +

link with ! http://example.com/hello-world!

+ +

link with : http://example.com/hello-world:

+ +

link with , http://example.com/hello-world,

+ +

link with ; http://example.com/hello-world;

+ +

link with ) http://example.com/hello-world)

+ +

link with nothing http://example.com/hello-world

+ +

Example 597

+ +

The scheme http will be inserted automatically:

+ +

www.commonmark.org

+

Example 598

+ +

After a valid domain, zero or more non-space non-< characters may follow:

+ +

Visit www.commonmark.org/help for more information.

+ +

Example 599

+ +

Trailing punctuation (specifically, ?, !, ., ,, :, *, _, and ~) will not be considered part of the autolink, though they may be included in the interior of the link:

+ +

Visit www.commonmark.org.

+ +

Visit www.commonmark.org/a.b.

+ +

Example 600

+ +

www.google.com/search?q=Markup+(business)

+ +

(www.google.com/search?q=Markup+(business))

+ +

Example 601

+ +

www.google.com/search?q=(business))+ok

+ +

Example 602

+ +

www.google.com/search?q=commonmark&hl=en

+ +

www.google.com/search?q=commonmark&

+ +

Example 603

+ +

< immediately ends an autolink.

+ +

www.commonmark.org/he<lp

+ +

Example 604

+ +

http://commonmark.org

+ +

(Visit https://encrypted.google.com/search?q=Markup+(business))

+ +

Anonymous FTP is available at ftp://foo.bar.baz.

+ +

Extended email autolinks:

+ +

Example 605

+ +

foo@bar.baz

+ +

Example 606

+ +

hello@mail+xyz.example isn't valid, but hello+xyz@mail.example is.

+ +

Example 607

+ +

a.b-c_d@a.b

+ +

a.b-c_d@a.b.

+ +

a.b-c_d@a.b-

+ +
a.b-c_d@a.b_
+
\ No newline at end of file diff --git a/test/new/gfm_links.md b/test/new/gfm_links.md index c1336661a9..86b9a3eec2 100644 --- a/test/new/gfm_links.md +++ b/test/new/gfm_links.md @@ -1 +1,83 @@ -This should be a link: http://example.com/hello-world. +link with . http://example.com/hello-world. + +link with ! http://example.com/hello-world! + +link with : http://example.com/hello-world: + +link with , http://example.com/hello-world, + +link with ; http://example.com/hello-world; + +link with ) http://example.com/hello-world) + +link with nothing http://example.com/hello-world + +### Example 597 + +The scheme http will be inserted automatically: + +www.commonmark.org + +### Example 598 + +After a valid domain, zero or more non-space non-< characters may follow: + +Visit www.commonmark.org/help for more information. + +### Example 599 + +Trailing punctuation (specifically, ?, !, ., ,, :, \*, \_, and ~) will not be considered part of the autolink, though they may be included in the interior of the link: + +Visit www.commonmark.org. + +Visit www.commonmark.org/a.b. + +### Example 600 + +www.google.com/search?q=Markup+(business) + +(www.google.com/search?q=Markup+(business)) + +### Example 601 + +www.google.com/search?q=(business))+ok + +### Example 602 + +www.google.com/search?q=commonmark&hl=en + +www.google.com/search?q=commonmark& + +### Example 603 + +< immediately ends an autolink. + +www.commonmark.org/he<svg/onload="alert(1)"//@x

+

<<svg/onload="alert(1)"//@x>

-

bar"onclick="alert('XSS')"@foo

+

<bar"onclick="alert('XSS')"@foo>