-
Notifications
You must be signed in to change notification settings - Fork 3.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix(elixir) fix regular expression detection #3207
Changes from all commits
8370126
6ac21f1
b1c280d
ca0e64c
df7e30d
9e1db7a
541034b
03b87b6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,115 +6,167 @@ Category: functional | |
Website: https://elixir-lang.org | ||
*/ | ||
|
||
import * as regex from '../lib/regex.js'; | ||
|
||
/** @type LanguageFn */ | ||
export default function(hljs) { | ||
const ELIXIR_IDENT_RE = '[a-zA-Z_][a-zA-Z0-9_.]*(!|\\?)?'; | ||
const ELIXIR_METHOD_RE = '[a-zA-Z_]\\w*[!?=]?|[-+~]@|<<|>>|=~|===?|<=>|[<>]=?|\\*\\*|[-/+%^&*~`|]|\\[\\]=?'; | ||
const ELIXIR_KEYWORDS = { | ||
const KEYWORDS = [ | ||
"alias", | ||
"alias", | ||
"and", | ||
"begin", | ||
"break", | ||
"case", | ||
"cond", | ||
"defined", | ||
"do", | ||
"end", | ||
"ensure", | ||
"false", | ||
"fn", | ||
"for", | ||
"import", | ||
"in", | ||
"include", | ||
"module", | ||
"next", | ||
"nil", | ||
"not", | ||
"or", | ||
"quote", | ||
"redo", | ||
"require", | ||
"retry", | ||
"return", | ||
"self", | ||
"then", | ||
"true", | ||
"unless", | ||
"until", | ||
"use", | ||
"when", | ||
"while", | ||
"with|0" | ||
]; | ||
const KWS = { | ||
$pattern: ELIXIR_IDENT_RE, | ||
keyword: 'and false then defined module in return redo retry end for true self when ' + | ||
'next until do begin unless nil break not case cond alias while ensure or ' + | ||
'include use alias fn quote require import with|0' | ||
keyword: KEYWORDS | ||
}; | ||
const SUBST = { | ||
className: 'subst', | ||
begin: /#\{/, | ||
end: /\}/, | ||
keywords: ELIXIR_KEYWORDS | ||
keywords: KWS | ||
}; | ||
const NUMBER = { | ||
className: 'number', | ||
begin: '(\\b0o[0-7_]+)|(\\b0b[01_]+)|(\\b0x[0-9a-fA-F_]+)|(-?\\b[1-9][0-9_]*(\\.[0-9_]+([eE][-+]?[0-9]+)?)?)', | ||
relevance: 0 | ||
}; | ||
// TODO: could be tightened | ||
// https://elixir-lang.readthedocs.io/en/latest/intro/18.html | ||
// but you also need to include closing delemeters in the escape list per | ||
// individual sigil mode from what I can tell, | ||
// ie: \} might or might not be an escape depending on the sigil used | ||
const ESCAPES_RE = /\\[\s\S]/; | ||
// const ESCAPES_RE = /\\["'\\abdefnrstv0]/; | ||
const BACKSLASH_ESCAPE = { | ||
match: ESCAPES_RE, | ||
scope: "char.escape", | ||
relevance: 0 | ||
}; | ||
const SIGIL_DELIMITERS = '[/|([{<"\']'; | ||
const SIGIL_DELIMITER_MODES = [ | ||
{ | ||
begin: /"/, | ||
end: /"/ | ||
}, | ||
{ | ||
begin: /'/, | ||
end: /'/ | ||
}, | ||
{ | ||
begin: /\//, | ||
end: /\// | ||
}, | ||
{ | ||
begin: /\|/, | ||
end: /\|/ | ||
}, | ||
{ | ||
begin: /\(/, | ||
end: /\)/ | ||
}, | ||
{ | ||
begin: /\[/, | ||
end: /\]/ | ||
}, | ||
{ | ||
begin: /\{/, | ||
end: /\}/ | ||
}, | ||
{ | ||
begin: /</, | ||
end: />/ | ||
} | ||
]; | ||
const escapeSigilEnd = (end) => { | ||
return { | ||
scope: "char.escape", | ||
begin: regex.concat(/\\/, end), | ||
relevance: 0 | ||
}; | ||
}; | ||
const LOWERCASE_SIGIL = { | ||
className: 'string', | ||
begin: '~[a-z]' + '(?=' + SIGIL_DELIMITERS + ')', | ||
contains: [ | ||
contains: SIGIL_DELIMITER_MODES.map(x => hljs.inherit(x, | ||
{ | ||
endsParent: true, | ||
contains: [ | ||
{ | ||
contains: [ | ||
hljs.BACKSLASH_ESCAPE, | ||
SUBST | ||
], | ||
variants: [ | ||
{ | ||
begin: /"/, | ||
end: /"/ | ||
}, | ||
{ | ||
begin: /'/, | ||
end: /'/ | ||
}, | ||
{ | ||
begin: /\//, | ||
end: /\// | ||
}, | ||
{ | ||
begin: /\|/, | ||
end: /\|/ | ||
}, | ||
{ | ||
begin: /\(/, | ||
end: /\)/ | ||
}, | ||
{ | ||
begin: /\[/, | ||
end: /\]/ | ||
}, | ||
{ | ||
begin: /\{/, | ||
end: /\}/ | ||
}, | ||
{ | ||
begin: /</, | ||
end: />/ | ||
} | ||
] | ||
} | ||
escapeSigilEnd(x.end), | ||
BACKSLASH_ESCAPE, | ||
SUBST | ||
] | ||
} | ||
] | ||
)) | ||
}; | ||
|
||
const UPCASE_SIGIL = { | ||
className: 'string', | ||
begin: '~[A-Z]' + '(?=' + SIGIL_DELIMITERS + ')', | ||
contains: [ | ||
contains: SIGIL_DELIMITER_MODES.map(x => hljs.inherit(x, | ||
{ | ||
begin: /"/, | ||
end: /"/ | ||
}, | ||
{ | ||
begin: /'/, | ||
end: /'/ | ||
}, | ||
{ | ||
begin: /\//, | ||
end: /\// | ||
}, | ||
{ | ||
begin: /\|/, | ||
end: /\|/ | ||
}, | ||
{ | ||
begin: /\(/, | ||
end: /\)/ | ||
}, | ||
{ | ||
begin: /\[/, | ||
end: /\]/ | ||
}, | ||
contains: [ escapeSigilEnd(x.end) ] | ||
} | ||
)) | ||
}; | ||
|
||
const REGEX_SIGIL = { | ||
className: 'regex', | ||
variants: [ | ||
{ | ||
begin: /\{/, | ||
end: /\}/ | ||
begin: '~r' + '(?=' + SIGIL_DELIMITERS + ')', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will this also mark regex modifiers as part of the regex? E.g.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, but we should add that. What are all the valid modifiers in Elixir? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for sigils
for sigils:
Treating any group of lower and/or uppercase letters immediately after the closing delimiter as a modifier and thus part of the sigil could also be enough. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added modifiers to ~r/~R. I'll save There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
At the moment yes, but the language syntax already allows for any sigil to have any modifier that it wants. That actually makes me realize that anyone can define their own custom sigil with their own modifiers 😅. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Problems for another day. :) Does the PR look workable for now you think? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I find it hard to judge the code, but from playing around with For example for those two pairs the output HTML should literally only differ by a single letter (
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because I thought uppercase sigils didn't allow escaping. I guess escaping the end sigil character is the exception to the rule? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, that is the exception. Quoting the docs about the sigil R:
|
||
contains: SIGIL_DELIMITER_MODES.map(x => hljs.inherit(x, | ||
{ | ||
end: regex.concat(x.end, /[uismxfU]{0,7}/), | ||
contains: [ | ||
escapeSigilEnd(x.end), | ||
BACKSLASH_ESCAPE, | ||
SUBST | ||
] | ||
} | ||
)) | ||
}, | ||
{ | ||
begin: /</, | ||
end: />/ | ||
begin: '~R' + '(?=' + SIGIL_DELIMITERS + ')', | ||
contains: SIGIL_DELIMITER_MODES.map(x => hljs.inherit(x, | ||
{ | ||
end: regex.concat(x.end, /[uismxfU]{0,7}/), | ||
contains: [ escapeSigilEnd(x.end) ] | ||
}) | ||
) | ||
} | ||
] | ||
}; | ||
|
@@ -182,6 +234,7 @@ export default function(hljs) { | |
}); | ||
const ELIXIR_DEFAULT_CONTAINS = [ | ||
STRING, | ||
REGEX_SIGIL, | ||
UPCASE_SIGIL, | ||
LOWERCASE_SIGIL, | ||
hljs.HASH_COMMENT_MODE, | ||
|
@@ -213,45 +266,13 @@ export default function(hljs) { | |
}, | ||
{ | ||
begin: '->' | ||
}, | ||
{ // regexp container | ||
begin: '(' + hljs.RE_STARTERS_RE + ')\\s*', | ||
contains: [ | ||
hljs.HASH_COMMENT_MODE, | ||
{ | ||
// to prevent false regex triggers for the division function: | ||
// /: | ||
begin: /\/: (?=\d+\s*[,\]])/, | ||
relevance: 0, | ||
contains: [NUMBER] | ||
}, | ||
{ | ||
className: 'regexp', | ||
illegal: '\\n', | ||
contains: [ | ||
hljs.BACKSLASH_ESCAPE, | ||
SUBST | ||
], | ||
variants: [ | ||
{ | ||
begin: '/', | ||
end: '/[a-z]*' | ||
}, | ||
{ | ||
begin: '%r\\[', | ||
end: '\\][a-z]*' | ||
} | ||
] | ||
} | ||
], | ||
relevance: 0 | ||
} | ||
]; | ||
SUBST.contains = ELIXIR_DEFAULT_CONTAINS; | ||
|
||
return { | ||
name: 'Elixir', | ||
keywords: ELIXIR_KEYWORDS, | ||
keywords: KWS, | ||
contains: ELIXIR_DEFAULT_CONTAINS | ||
}; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I know that this PR merely reformats the list, but it caught my attention because there are a lot of keywords in it that I have never seen in Elixir. Also let my starts by saying that I am not sure if there is a formal definition of a "keyword". I'm using that term rather intuitively.
As far as I know, those are not Elixir keywords at all:
begin
break
(Inspect.Algebra.break/1
is a function in the standard lib, but it has nothing to do with breaking out of loops like it might do in other languages)defined
ensure
include
module
next
(OptionParser.next/2
is a function in the standard lib, it has nothing to do with skipping to another iteration in a loop like it might do in other languages)redo
retry
return
until
while
I'm unsure about:
self
. It exists, but IMO it is a normal macro, not a keyword. I wouldn't expect it to be colored differently than other normal macros (likerem
,round
,trunc
etc).then
. It's a new macro added in Elixir 1.12 together withtap
, I don't think those are keywords.with|0
. I'm not sure what the pipe and zero mean here.with
on its own is definitely a keyword.Missing IMO:
case
,cond
, andunless
qualify as keywords,if
andelse
should too.quote
qualifies as a keyword,unquote
andunquote_splicing
should too.receive
/after
https://hexdocs.pm/elixir/Kernel.SpecialForms.html#receive/1try
/rescue
/catch
/after
https://hexdocs.pm/elixir/Kernel.SpecialForms.html#try/1raise
https://hexdocs.pm/elixir/Kernel.html#raise/1reraise
https://hexdocs.pm/elixir/Kernel.html#reraise/2There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
alias
is also listed twice in the list.