From 8370126edffc009e5dea3845f6cce8163d1cf727 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Sat, 29 May 2021 19:46:41 -0400 Subject: [PATCH 1/7] clean up grammar a bit, add more regex sigil examples --- src/languages/elixir.js | 167 ++++++++++++++------------- test/markup/elixir/sigils.expect.txt | 9 ++ test/markup/elixir/sigils.txt | 9 ++ 3 files changed, 103 insertions(+), 82 deletions(-) diff --git a/src/languages/elixir.js b/src/languages/elixir.js index 0a10aacb46..b55cc4cad5 100644 --- a/src/languages/elixir.js +++ b/src/languages/elixir.js @@ -10,17 +10,53 @@ Website: https://elixir-lang.org export default function(hljs) { const ELIXIR_IDENT_RE = '[a-zA-Z_][a-zA-Z0-9_.]*(!|\\?)?'; const ELIXIR_METHOD_RE = '[a-zA-Z_]\\w*[!?=]?|[-+~]@|<<|>>|=~|===?|<=>|[<>]=?|\\*\\*|[-/+%^&*~`|]|\\[\\]=?'; - const ELIXIR_KEYWORDS = { + const KEYWORDS = [ + "alias", + "alias", + "and", + "begin", + "break", + "case", + "cond", + "defined", + "do", + "end", + "ensure", + "false", + "fn", + "for", + "import", + "in", + "include", + "module", + "next", + "nil", + "not", + "or", + "quote", + "redo", + "require", + "retry", + "return", + "self", + "then", + "true", + "unless", + "until", + "use", + "when", + "while", + "with|0" + ]; + const KWS = { $pattern: ELIXIR_IDENT_RE, - keyword: 'and false then defined module in return redo retry end for true self when ' + - 'next until do begin unless nil break not case cond alias while ensure or ' + - 'include use alias fn quote require import with|0' + keyword: KEYWORDS }; const SUBST = { className: 'subst', begin: /#\{/, end: /\}/, - keywords: ELIXIR_KEYWORDS + keywords: KWS }; const NUMBER = { className: 'number', @@ -28,54 +64,50 @@ export default function(hljs) { relevance: 0 }; const SIGIL_DELIMITERS = '[/|([{<"\']'; + const SIGIL_DELIMITER_MODES = [ + { + begin: /"/, + end: /"/ + }, + { + begin: /'/, + end: /'/ + }, + { + begin: /\//, + end: /\// + }, + { + begin: /\|/, + end: /\|/ + }, + { + begin: /\(/, + end: /\)/ + }, + { + begin: /\[/, + end: /\]/ + }, + { + begin: /\{/, + end: /\}/ + }, + { + begin: // + } + ]; const LOWERCASE_SIGIL = { className: 'string', begin: '~[a-z]' + '(?=' + SIGIL_DELIMITERS + ')', contains: [ { - endsParent: true, contains: [ - { - contains: [ - hljs.BACKSLASH_ESCAPE, - SUBST - ], - variants: [ - { - begin: /"/, - end: /"/ - }, - { - begin: /'/, - end: /'/ - }, - { - begin: /\//, - end: /\// - }, - { - begin: /\|/, - end: /\|/ - }, - { - begin: /\(/, - end: /\)/ - }, - { - begin: /\[/, - end: /\]/ - }, - { - begin: /\{/, - end: /\}/ - }, - { - begin: // - } - ] - } - ] + hljs.BACKSLASH_ESCAPE, + SUBST + ], + variants: SIGIL_DELIMITER_MODES.map(x => hljs.inherit(x)) } ] }; @@ -83,40 +115,7 @@ export default function(hljs) { const UPCASE_SIGIL = { className: 'string', begin: '~[A-Z]' + '(?=' + SIGIL_DELIMITERS + ')', - contains: [ - { - begin: /"/, - end: /"/ - }, - { - begin: /'/, - end: /'/ - }, - { - begin: /\//, - end: /\// - }, - { - begin: /\|/, - end: /\|/ - }, - { - begin: /\(/, - end: /\)/ - }, - { - begin: /\[/, - end: /\]/ - }, - { - begin: /\{/, - end: /\}/ - }, - { - begin: // - } - ] + contains: SIGIL_DELIMITER_MODES.map(x => hljs.inherit(x)) }; const STRING = { @@ -225,6 +224,10 @@ export default function(hljs) { relevance: 0, contains: [NUMBER] }, + { + scope: "number", + match: /&[a-z][a-z_]+\??\/\d+/ + }, { className: 'regexp', illegal: '\\n', @@ -251,7 +254,7 @@ export default function(hljs) { return { name: 'Elixir', - keywords: ELIXIR_KEYWORDS, + keywords: KWS, contains: ELIXIR_DEFAULT_CONTAINS }; } diff --git a/test/markup/elixir/sigils.expect.txt b/test/markup/elixir/sigils.expect.txt index e9e194d493..e7c02d30b8 100644 --- a/test/markup/elixir/sigils.expect.txt +++ b/test/markup/elixir/sigils.expect.txt @@ -21,3 +21,12 @@ ~s[hello #{name}] ~s{hello #{name}} ~s<hello #{name}> + +~r/hello/ +~r|hello| +~r"hello" +~r'hello' +~r(hello) +~r[hello] +~r{hello} +~r<hello> diff --git a/test/markup/elixir/sigils.txt b/test/markup/elixir/sigils.txt index 7547ecd377..29a8d780a1 100644 --- a/test/markup/elixir/sigils.txt +++ b/test/markup/elixir/sigils.txt @@ -21,3 +21,12 @@ ~s[hello #{name}] ~s{hello #{name}} ~s + +~r/hello/ +~r|hello| +~r"hello" +~r'hello' +~r(hello) +~r[hello] +~r{hello} +~r From 6ac21f15a7856248fcac494abbace8dc72427147 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Sat, 29 May 2021 19:56:23 -0400 Subject: [PATCH 2/7] add regex sigils that classify as `regex` --- src/languages/elixir.js | 22 ++++++++++++++++++++++ test/markup/elixir/sigils.expect.txt | 21 ++++++++++++--------- test/markup/elixir/sigils.txt | 3 +++ 3 files changed, 37 insertions(+), 9 deletions(-) diff --git a/src/languages/elixir.js b/src/languages/elixir.js index b55cc4cad5..6da5d955e2 100644 --- a/src/languages/elixir.js +++ b/src/languages/elixir.js @@ -118,6 +118,27 @@ export default function(hljs) { contains: SIGIL_DELIMITER_MODES.map(x => hljs.inherit(x)) }; + const REGEX_SIGIL = { + className: 'regex', + variants: [ + { + begin: '~r' + '(?=' + SIGIL_DELIMITERS + ')', + contains: SIGIL_DELIMITER_MODES.map(x => hljs.inherit(x, + { + contains: [ + hljs.BACKSLASH_ESCAPE, + SUBST + ] + } + )) + }, + { + begin: '~R' + '(?=' + SIGIL_DELIMITERS + ')', + contains: SIGIL_DELIMITER_MODES.map(x => hljs.inherit(x)) + } + ] + }; + const STRING = { className: 'string', contains: [ @@ -181,6 +202,7 @@ export default function(hljs) { }); const ELIXIR_DEFAULT_CONTAINS = [ STRING, + REGEX_SIGIL, UPCASE_SIGIL, LOWERCASE_SIGIL, hljs.HASH_COMMENT_MODE, diff --git a/test/markup/elixir/sigils.expect.txt b/test/markup/elixir/sigils.expect.txt index e7c02d30b8..81935069d4 100644 --- a/test/markup/elixir/sigils.expect.txt +++ b/test/markup/elixir/sigils.expect.txt @@ -1,9 +1,12 @@ -~R'this + i\s "a" regex too' +~R'this + i\s "a" regex too' ~w(hello #{ ["has" <> "123", '\c\d', "\123 interpol" | []] } world)s ~W(hello #{no "123" \c\d \123 interpol} world)s ~s{Escapes terminators \{ and \}, but no {balancing} # outside of sigil here } ~S"No escapes \s\t\n and no #{interpolation}" +~S(No escapes \" \' \\ \a \b \d \e \f \n \r \s \t \v \0) +~s(Plenty of escapes \" \' \\ \a \b \d \e \f \n \r \s \t \v \0) + ~S/hello/ ~S|hello| ~S"hello" @@ -22,11 +25,11 @@ ~s{hello #{name}} ~s<hello #{name}> -~r/hello/ -~r|hello| -~r"hello" -~r'hello' -~r(hello) -~r[hello] -~r{hello} -~r<hello> +~r/hello/ +~r|hello| +~r"hello" +~r'hello' +~r(hello) +~r[hello] +~r{hello} +~r<hello> diff --git a/test/markup/elixir/sigils.txt b/test/markup/elixir/sigils.txt index 29a8d780a1..3655e78691 100644 --- a/test/markup/elixir/sigils.txt +++ b/test/markup/elixir/sigils.txt @@ -4,6 +4,9 @@ ~s{Escapes terminators \{ and \}, but no {balancing} # outside of sigil here } ~S"No escapes \s\t\n and no #{interpolation}" +~S(No escapes \" \' \\ \a \b \d \e \f \n \r \s \t \v \0) +~s(Plenty of escapes \" \' \\ \a \b \d \e \f \n \r \s \t \v \0) + ~S/hello/ ~S|hello| ~S"hello" From b1c280d0406e412df6d77baba376b5eb551e9d18 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Sat, 29 May 2021 20:14:02 -0400 Subject: [PATCH 3/7] add char.escape --- src/languages/elixir.js | 23 +++++++++++++++++------ test/markup/elixir/sigils.expect.txt | 4 ++-- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/languages/elixir.js b/src/languages/elixir.js index 6da5d955e2..1f2a584820 100644 --- a/src/languages/elixir.js +++ b/src/languages/elixir.js @@ -63,6 +63,18 @@ export default function(hljs) { begin: '(\\b0o[0-7_]+)|(\\b0b[01_]+)|(\\b0x[0-9a-fA-F_]+)|(-?\\b[1-9][0-9_]*(\\.[0-9_]+([eE][-+]?[0-9]+)?)?)', relevance: 0 }; + // TODO: could be tightened + // https://elixir-lang.readthedocs.io/en/latest/intro/18.html + // but you also need to include closing delemeters in the escape list per + // individual sigil mode from what I can tell, + // ie: \} might or might not be an escape depending on the sigil used + const ESCAPES_RE = /\\[\s\S]/; + // const ESCAPES_RE = /\\["'\\abdefnrstv0]/; + const BACKSLASH_ESCAPE = { + match: ESCAPES_RE, + scope: "char.escape", + relevance: 0 + }; const SIGIL_DELIMITERS = '[/|([{<"\']'; const SIGIL_DELIMITER_MODES = [ { @@ -101,15 +113,14 @@ export default function(hljs) { const LOWERCASE_SIGIL = { className: 'string', begin: '~[a-z]' + '(?=' + SIGIL_DELIMITERS + ')', - contains: [ + contains: SIGIL_DELIMITER_MODES.map(x => hljs.inherit(x, { contains: [ - hljs.BACKSLASH_ESCAPE, + BACKSLASH_ESCAPE, SUBST - ], - variants: SIGIL_DELIMITER_MODES.map(x => hljs.inherit(x)) + ] } - ] + )) }; const UPCASE_SIGIL = { @@ -126,7 +137,7 @@ export default function(hljs) { contains: SIGIL_DELIMITER_MODES.map(x => hljs.inherit(x, { contains: [ - hljs.BACKSLASH_ESCAPE, + BACKSLASH_ESCAPE, SUBST ] } diff --git a/test/markup/elixir/sigils.expect.txt b/test/markup/elixir/sigils.expect.txt index 81935069d4..915126bac8 100644 --- a/test/markup/elixir/sigils.expect.txt +++ b/test/markup/elixir/sigils.expect.txt @@ -1,11 +1,11 @@ ~R'this + i\s "a" regex too' ~w(hello #{ ["has" <> "123", '\c\d', "\123 interpol" | []] } world)s ~W(hello #{no "123" \c\d \123 interpol} world)s -~s{Escapes terminators \{ and \}, but no {balancing} # outside of sigil here } +~s{Escapes terminators \{ and \}, but no {balancing} # outside of sigil here } ~S"No escapes \s\t\n and no #{interpolation}" ~S(No escapes \" \' \\ \a \b \d \e \f \n \r \s \t \v \0) -~s(Plenty of escapes \" \' \\ \a \b \d \e \f \n \r \s \t \v \0) +~s(Plenty of escapes \" \' \\ \a \b \d \e \f \n \r \s \t \v \0) ~S/hello/ ~S|hello| From ca0e64c6bf87333103f3719d2b9568e6cf213036 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Sat, 29 May 2021 20:26:27 -0400 Subject: [PATCH 4/7] remove dead code --- src/languages/elixir.js | 36 ------------------------------------ 1 file changed, 36 deletions(-) diff --git a/src/languages/elixir.js b/src/languages/elixir.js index 1f2a584820..57e40fba63 100644 --- a/src/languages/elixir.js +++ b/src/languages/elixir.js @@ -245,42 +245,6 @@ export default function(hljs) { }, { begin: '->' - }, - { // regexp container - begin: '(' + hljs.RE_STARTERS_RE + ')\\s*', - contains: [ - hljs.HASH_COMMENT_MODE, - { - // to prevent false regex triggers for the division function: - // /: - begin: /\/: (?=\d+\s*[,\]])/, - relevance: 0, - contains: [NUMBER] - }, - { - scope: "number", - match: /&[a-z][a-z_]+\??\/\d+/ - }, - { - className: 'regexp', - illegal: '\\n', - contains: [ - hljs.BACKSLASH_ESCAPE, - SUBST - ], - variants: [ - { - begin: '/', - end: '/[a-z]*' - }, - { - begin: '%r\\[', - end: '\\][a-z]*' - } - ] - } - ], - relevance: 0 } ]; SUBST.contains = ELIXIR_DEFAULT_CONTAINS; From df7e30d627709cc5674fed34cc8ecc26426136d4 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Mon, 31 May 2021 08:25:23 -0400 Subject: [PATCH 5/7] regex modifiers --- src/languages/elixir.js | 9 ++++++++- test/markup/elixir/sigils.expect.txt | 17 +++++++++++------ test/markup/elixir/sigils.txt | 17 +++++++++++------ 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/src/languages/elixir.js b/src/languages/elixir.js index 57e40fba63..f41f7c40f8 100644 --- a/src/languages/elixir.js +++ b/src/languages/elixir.js @@ -6,6 +6,8 @@ Category: functional Website: https://elixir-lang.org */ +import * as regex from '../lib/regex.js'; + /** @type LanguageFn */ export default function(hljs) { const ELIXIR_IDENT_RE = '[a-zA-Z_][a-zA-Z0-9_.]*(!|\\?)?'; @@ -136,6 +138,7 @@ export default function(hljs) { begin: '~r' + '(?=' + SIGIL_DELIMITERS + ')', contains: SIGIL_DELIMITER_MODES.map(x => hljs.inherit(x, { + end: regex.concat(x.end, /[uismxfU]{0,7}/), contains: [ BACKSLASH_ESCAPE, SUBST @@ -145,7 +148,11 @@ export default function(hljs) { }, { begin: '~R' + '(?=' + SIGIL_DELIMITERS + ')', - contains: SIGIL_DELIMITER_MODES.map(x => hljs.inherit(x)) + contains: SIGIL_DELIMITER_MODES.map(x => hljs.inherit(x, + { + end: regex.concat(x.end, /[uismxfU]{0,7}/) + }) + ) } ] }; diff --git a/test/markup/elixir/sigils.expect.txt b/test/markup/elixir/sigils.expect.txt index 915126bac8..2e97c478cc 100644 --- a/test/markup/elixir/sigils.expect.txt +++ b/test/markup/elixir/sigils.expect.txt @@ -26,10 +26,15 @@ ~s<hello #{name}> ~r/hello/ -~r|hello| -~r"hello" -~r'hello' -~r(hello) -~r[hello] -~r{hello} +~r|hello|u +~r"hello"i +~r'hello'm +~r(hello)x +~r[hello]f +~r{hello}U ~r<hello> + +~r<regex here>uismxfU +~r/regex here/uismxfU +~R<regex here>uismxfU +~R/regex here/uismxfU diff --git a/test/markup/elixir/sigils.txt b/test/markup/elixir/sigils.txt index 3655e78691..d3863f71ee 100644 --- a/test/markup/elixir/sigils.txt +++ b/test/markup/elixir/sigils.txt @@ -26,10 +26,15 @@ ~s ~r/hello/ -~r|hello| -~r"hello" -~r'hello' -~r(hello) -~r[hello] -~r{hello} +~r|hello|u +~r"hello"i +~r'hello'm +~r(hello)x +~r[hello]f +~r{hello}U ~r + +~ruismxfU +~r/regex here/uismxfU +~RuismxfU +~R/regex here/uismxfU From 9e1db7a35dd94c7036c2daf0deb285067b37a911 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Mon, 31 May 2021 12:15:13 -0400 Subject: [PATCH 6/7] properly escape the end of sigils --- src/languages/elixir.js | 18 ++++++++++++++++-- test/markup/elixir/sigils.expect.txt | 9 +++++++++ test/markup/elixir/sigils.txt | 9 +++++++++ 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/languages/elixir.js b/src/languages/elixir.js index f41f7c40f8..6c5dfa46c3 100644 --- a/src/languages/elixir.js +++ b/src/languages/elixir.js @@ -112,12 +112,20 @@ export default function(hljs) { end: />/ } ]; + const escapeSigilEnd = (end) => { + return { + scope: "char.escape", + begin: regex.concat(/\\/, end), + relevance: 0 + }; + }; const LOWERCASE_SIGIL = { className: 'string', begin: '~[a-z]' + '(?=' + SIGIL_DELIMITERS + ')', contains: SIGIL_DELIMITER_MODES.map(x => hljs.inherit(x, { contains: [ + escapeSigilEnd(x.end), BACKSLASH_ESCAPE, SUBST ] @@ -128,7 +136,11 @@ export default function(hljs) { const UPCASE_SIGIL = { className: 'string', begin: '~[A-Z]' + '(?=' + SIGIL_DELIMITERS + ')', - contains: SIGIL_DELIMITER_MODES.map(x => hljs.inherit(x)) + contains: SIGIL_DELIMITER_MODES.map(x => hljs.inherit(x, + { + contains: [ escapeSigilEnd(x.end) ] + } + )) }; const REGEX_SIGIL = { @@ -140,6 +152,7 @@ export default function(hljs) { { end: regex.concat(x.end, /[uismxfU]{0,7}/), contains: [ + escapeSigilEnd(x.end), BACKSLASH_ESCAPE, SUBST ] @@ -150,7 +163,8 @@ export default function(hljs) { begin: '~R' + '(?=' + SIGIL_DELIMITERS + ')', contains: SIGIL_DELIMITER_MODES.map(x => hljs.inherit(x, { - end: regex.concat(x.end, /[uismxfU]{0,7}/) + end: regex.concat(x.end, /[uismxfU]{0,7}/), + contains: [ escapeSigilEnd(x.end) ] }) ) } diff --git a/test/markup/elixir/sigils.expect.txt b/test/markup/elixir/sigils.expect.txt index 2e97c478cc..1a3977e772 100644 --- a/test/markup/elixir/sigils.expect.txt +++ b/test/markup/elixir/sigils.expect.txt @@ -38,3 +38,12 @@ ~r/regex here/uismxfU ~R<regex here>uismxfU ~R/regex here/uismxfU + +~r|foo\|bar| +~R|foo\|bar| + +~r(hello( there\)*!)u +~R(hello( there\)*!)u + +~s|foo\|bar| +~S|foo\|bar| diff --git a/test/markup/elixir/sigils.txt b/test/markup/elixir/sigils.txt index d3863f71ee..462d9c556a 100644 --- a/test/markup/elixir/sigils.txt +++ b/test/markup/elixir/sigils.txt @@ -38,3 +38,12 @@ ~r/regex here/uismxfU ~RuismxfU ~R/regex here/uismxfU + +~r|foo\|bar| +~R|foo\|bar| + +~r(hello( there\)*!)u +~R(hello( there\)*!)u + +~s|foo\|bar| +~S|foo\|bar| From 03b87b6500cb97e95f1b95cfebc7967cc3ee7d6c Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Tue, 1 Jun 2021 22:32:40 -0400 Subject: [PATCH 7/7] changelog [no ci] --- CHANGES.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 4efadc8474..0b11c531fe 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,9 +3,10 @@ Grammars: - enh(clojure) added `edn` alias (#3213) [Stel Abrego][] +- enh(elixir) much improved regular expression sigil support (#3207) [Josh Goebel][] [Stel Abrego]: https://github.com/stelcodes - +[Josh Goebel]: https://github.com/joshgoebel ## Version 11.0.0