From 8defb26516e72915ba38f74286b2ddce2cf99aa5 Mon Sep 17 00:00:00 2001 From: Sergey Mashkov Date: Sun, 6 Aug 2017 02:09:32 +0300 Subject: [PATCH 01/11] Kotlin: initial scanner implementation --- lib/coderay/helpers/file_type.rb | 2 + lib/coderay/scanners/kotlin.rb | 163 +++++++++++++++++++++++++++++++ 2 files changed, 165 insertions(+) create mode 100644 lib/coderay/scanners/kotlin.rb diff --git a/lib/coderay/helpers/file_type.rb b/lib/coderay/helpers/file_type.rb index 7de34d58..09819cca 100644 --- a/lib/coderay/helpers/file_type.rb +++ b/lib/coderay/helpers/file_type.rb @@ -97,6 +97,8 @@ def type_from_shebang filename 'java' => :java, 'js' => :java_script, 'json' => :json, + 'kt' => :kotlin, + 'kts' => :kotlin, 'lua' => :lua, 'mab' => :ruby, 'pas' => :delphi, diff --git a/lib/coderay/scanners/kotlin.rb b/lib/coderay/scanners/kotlin.rb new file mode 100644 index 00000000..aca3b3d7 --- /dev/null +++ b/lib/coderay/scanners/kotlin.rb @@ -0,0 +1,163 @@ +module CodeRay + module Scanners + + load :java + + class Kotlin < Java + + register_for :kotlin + file_extension 'kt' + + KOTLIN_KEYWORDS = %w[ + package import + as? as is + val var + class interface object fun init get set + in out + if when else for while do return break continue + ] + + KOTLIN_MODIFIERS = %w[ + annotation enum data sealed companion + abstract open final + public protected private internal + inline suspend + inner + ] + + TYPES = %w[ + Boolean Byte Char class Double Float Int Long Short Unit Nothing Any + ] + + STRING_CONTENT_PATTERN = { + "'" => /[^\\'$]+/, + '"' => /[^\\"$]+/, + } # :nodoc:s + + IDENT_KIND = Java::IDENT_KIND.dup. + add(TYPES, :type). + add(KOTLIN_KEYWORDS, :keyword). + add(KOTLIN_MODIFIERS, :keyword) # :nodoc: + + + def setup + @state = :initial + end + + def scan_tokens encoder, options + string_delimiter = nil + state = options[:state] || @state + last_token_dot = false + class_name_follows = false + delimiters = [] + + until eos? + + case state + + when :initial + if (match = scan(/ \s+ | \\\n /x)) + encoder.text_token match, :space + next + elsif (match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)) + encoder.text_token match, :comment + next + elsif (match = scan(/ TODO \( /ox)) + encoder.text_token "TODO", :comment + encoder.text_token "(", :operator + elsif (match = scan(/ #{IDENT} /ox)) + kind = IDENT_KIND[match] + if last_token_dot + kind = :ident + elsif class_name_follows + kind = :class + class_name_follows = false + else + case match + when 'import' + package_name_expected = :include + when 'package' + package_name_expected = :namespace + when 'class', 'interface' + class_name_follows = true + else + # nothing + end + end + encoder.text_token match, kind + elsif (match = scan(/ \.(?!\d) | [,?:()\[\]] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? /x)) + encoder.text_token match, :operator + elsif (match = scan(/{/)) + class_name_follows = false + encoder.text_token match, :operator + elsif (match = scan(/}/)) + encoder.text_token match, :operator + + unless delimiters.empty? + string_delimiter = delimiters.pop + encoder.end_group state + state = :string + end + elsif (match = scan(/["']/)) + state = :string + encoder.begin_group state + string_delimiter = match + encoder.text_token match, :delimiter + elsif check(/[\d.]/) + if (match = scan(/0[xX][0-9A-Fa-f]+/)) + encoder.text_token match, :hex + elsif (match = scan(/(?>0[0-7]+)(?![89.eEfF])/)) + encoder.text_token match, :octal + elsif (match = scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)) + encoder.text_token match, :float + elsif (match = scan(/\d+[lL]?/)) + encoder.text_token match, :integer + end + + elsif (match = scan(/ @ #{IDENT} /ox)) + encoder.text_token match, :annotation + + else + encoder.text_token getch, :error + end + + when :string + if (match = scan(/\${/)) + encoder.text_token match, :operator + + state = :initial + encoder.begin_group state + + delimiters << string_delimiter + string_delimiter = nil + elsif (match = scan(/ \$ #{IDENT} /ox)) + encoder.text_token match, :ident + elsif (match = scan(STRING_CONTENT_PATTERN[string_delimiter])) + encoder.text_token match, :content + elsif (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)) + if string_delimiter == "'" && !(match == "\\\\" || match == "\\'") + encoder.text_token match, :content + else + encoder.text_token match, :char + end + elsif (match = scan(/["']/)) + encoder.text_token match, :delimiter + encoder.end_group state + state = :initial + string_delimiter = nil + elsif (match = scan(/ \\ | $ /x)) + encoder.end_group state + state = :initial + encoder.text_token match, :error unless match.empty? + else + raise_inspect "else case \" reached; %p not handled." % peek(1), encoder + end + else + raise_inspect 'Unknown state', encoder + end + + end + end + end + end +end From 63c5556ab6a9c6bca053275ca4fb876cac88fa4e Mon Sep 17 00:00:00 2001 From: Sergey Mashkov Date: Sun, 6 Aug 2017 02:09:52 +0300 Subject: [PATCH 02/11] Kotlin: prepare smoke test --- test/executable/source.kt | 31 +++++++++++++++++++++++++++++++ test/executable/suite.rb | 21 +++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 test/executable/source.kt diff --git a/test/executable/source.kt b/test/executable/source.kt new file mode 100644 index 00000000..4bcaa378 --- /dev/null +++ b/test/executable/source.kt @@ -0,0 +1,31 @@ +class Test { + val text1: String = "abc \' def \u0000 \n \" hehe " + val text2: List = listOf('a', '\n', '\'', '"', '\"') + val numbers = listOf(0, 12, 1.0f, 1.0, 1L, 0x1f, -1, -12, -1.0f, -1.0, -1L) + + val template = "abc${1 + "b"}def" + val template2 = "abc${1 + 'b'}def" + val template3 = "abc $var def" + + var v: Int = 0 + + fun function(): ReturnType { + } + + fun parametrizedFunction(): T = TODO() + + fun references() { + super.references() + this.references() + } + + @Annotation + class Annotated + + inner class Inner + + object O + + companion object { + } +} \ No newline at end of file diff --git a/test/executable/suite.rb b/test/executable/suite.rb index 997405ca..f490a497 100644 --- a/test/executable/suite.rb +++ b/test/executable/suite.rb @@ -194,6 +194,27 @@ def coderay args, options = {} end end + context 'highlighting a file without explicit input type (source.kt)' do + source_file = ROOT_DIR + 'test/executable/source.kt' + command = "#{source_file} -html" + + source = File.read source_file + + pre = %r{
(.*?)
}m + tag_class = /]*>/ + + should 'generate json' do + target = coderay("#{source_file} #{source_file}.json") + target = coderay("#{source_file} #{source_file}.html") + end + + # should 'respect the file extension and highlight the input as Kotlin' do + # target = coderay(command) + # assert_equal %w(keyword class), target[pre, 1].scan(tag_class).flatten + # end + + end + context 'highlighting a file with explicit input and output type (-ruby source.py -span)' do source_file = ROOT_DIR + 'test/executable/source.py' command = "-ruby #{source_file} -span" From 10ae5207bcc064abc3c7bd9200c23233f3d27247 Mon Sep 17 00:00:00 2001 From: Sergey Mashkov Date: Sun, 6 Aug 2017 22:42:13 +0300 Subject: [PATCH 03/11] Kotlin: implement multiline strings, fix nested braces --- lib/coderay/scanners/kotlin.rb | 41 ++++++++++++++++++++++++++++++---- test/executable/source.kt | 7 ++++++ 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/lib/coderay/scanners/kotlin.rb b/lib/coderay/scanners/kotlin.rb index aca3b3d7..40cd3740 100644 --- a/lib/coderay/scanners/kotlin.rb +++ b/lib/coderay/scanners/kotlin.rb @@ -50,6 +50,7 @@ def scan_tokens encoder, options last_token_dot = false class_name_follows = false delimiters = [] + states = [] until eos? @@ -90,14 +91,22 @@ def scan_tokens encoder, options elsif (match = scan(/{/)) class_name_follows = false encoder.text_token match, :operator + states << :initial elsif (match = scan(/}/)) encoder.text_token match, :operator - unless delimiters.empty? - string_delimiter = delimiters.pop - encoder.end_group state - state = :string + unless states.empty? + state = states.pop + + if state == :string || state == :multiline_string + string_delimiter = delimiters.pop + encoder.end_group :initial + end end + elsif (match = scan(/"""/)) + state = :multiline_string + encoder.begin_group :string + encoder.text_token match, :delimiter elsif (match = scan(/["']/)) state = :string encoder.begin_group state @@ -129,6 +138,7 @@ def scan_tokens encoder, options encoder.begin_group state delimiters << string_delimiter + states << :string string_delimiter = nil elsif (match = scan(/ \$ #{IDENT} /ox)) encoder.text_token match, :ident @@ -152,6 +162,29 @@ def scan_tokens encoder, options else raise_inspect "else case \" reached; %p not handled." % peek(1), encoder end + when :multiline_string + if (match = scan(/\${/)) + encoder.text_token match, :operator + + state = :initial + encoder.begin_group state + + delimiters << nil + states << :multiline_string + elsif (match = scan(/ \$ #{IDENT} /ox)) + encoder.text_token match, :ident + elsif (match = scan(/ [^$\\"]+ /x)) + encoder.text_token match, :content + elsif (match = scan(/"""/x)) + encoder.text_token match, :delimiter + encoder.end_group :string + state = :initial + string_delimiter = nil + elsif (match = scan(/"/)) + encoder.text_token match, :content + else + raise_inspect "else case \" reached; %p not handled." % peek(1), encoder + end else raise_inspect 'Unknown state', encoder end diff --git a/test/executable/source.kt b/test/executable/source.kt index 4bcaa378..34332dea 100644 --- a/test/executable/source.kt +++ b/test/executable/source.kt @@ -7,6 +7,13 @@ class Test { val template2 = "abc${1 + 'b'}def" val template3 = "abc $var def" + val multiline = """ first line $var ${1 + 1} + second line + and quotes: ' " '' "" ok + """ + + val innerBraaces = " before ${ if (true) { 1 } else { 2 } }" + var v: Int = 0 fun function(): ReturnType { From b1fbdab4938776dfe44df9895c445ea154f2f4f7 Mon Sep 17 00:00:00 2001 From: Sergey Mashkov Date: Sun, 6 Aug 2017 23:20:09 +0300 Subject: [PATCH 04/11] Kotlin: smoke test and expected test data --- .gitignore | 2 ++ test/executable/source.kt | 2 +- test/executable/source.kt.expected.json | 1 + test/executable/suite.rb | 17 ++++++++--------- 4 files changed, 12 insertions(+), 10 deletions(-) create mode 100644 test/executable/source.kt.expected.json diff --git a/.gitignore b/.gitignore index deed1a27..5c761439 100644 --- a/.gitignore +++ b/.gitignore @@ -10,5 +10,7 @@ Gemfile.lock .ruby-version test/executable/source.rb.html test/executable/source.rb.json +test/executable/*.actual.html +test/executable/*.actual.json test/scanners old-stuff diff --git a/test/executable/source.kt b/test/executable/source.kt index 34332dea..290f9d6a 100644 --- a/test/executable/source.kt +++ b/test/executable/source.kt @@ -12,7 +12,7 @@ class Test { and quotes: ' " '' "" ok """ - val innerBraaces = " before ${ if (true) { 1 } else { 2 } }" + val innerBraces = " before ${ if (true) { 1 } else { 2 } }" var v: Int = 0 diff --git a/test/executable/source.kt.expected.json b/test/executable/source.kt.expected.json new file mode 100644 index 00000000..f4f2643b --- /dev/null +++ b/test/executable/source.kt.expected.json @@ -0,0 +1 @@ +[{"type":"text","text":"class","kind":"keyword"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"Test","kind":"class"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"{","kind":"operator"},{"type":"text","text":"\n ","kind":"space"},{"type":"text","text":"val","kind":"keyword"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"text1","kind":"ident"},{"type":"text","text":":","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"String","kind":"predefined_type"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"=","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"block","action":"open","kind":"string"},{"type":"text","text":"\"","kind":"delimiter"},{"type":"text","text":"abc ","kind":"content"},{"type":"text","text":"\\'","kind":"char"},{"type":"text","text":" def ","kind":"content"},{"type":"text","text":"\\u0000","kind":"char"},{"type":"text","text":" ","kind":"content"},{"type":"text","text":"\\n","kind":"char"},{"type":"text","text":" ","kind":"content"},{"type":"text","text":"\\\"","kind":"char"},{"type":"text","text":" hehe ","kind":"content"},{"type":"text","text":"\"","kind":"delimiter"},{"type":"block","action":"close","kind":"string"},{"type":"text","text":"\n ","kind":"space"},{"type":"text","text":"val","kind":"keyword"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"text2","kind":"ident"},{"type":"text","text":":","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"List","kind":"predefined_type"},{"type":"text","text":"<","kind":"operator"},{"type":"text","text":"Char","kind":"type"},{"type":"text","text":">","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"=","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"listOf","kind":"ident"},{"type":"text","text":"(","kind":"operator"},{"type":"block","action":"open","kind":"string"},{"type":"text","text":"'","kind":"delimiter"},{"type":"text","text":"a","kind":"content"},{"type":"text","text":"'","kind":"delimiter"},{"type":"block","action":"close","kind":"string"},{"type":"text","text":",","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"block","action":"open","kind":"string"},{"type":"text","text":"'","kind":"delimiter"},{"type":"text","text":"\\n","kind":"content"},{"type":"text","text":"'","kind":"delimiter"},{"type":"block","action":"close","kind":"string"},{"type":"text","text":",","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"block","action":"open","kind":"string"},{"type":"text","text":"'","kind":"delimiter"},{"type":"text","text":"\\'","kind":"char"},{"type":"text","text":"'","kind":"delimiter"},{"type":"block","action":"close","kind":"string"},{"type":"text","text":",","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"block","action":"open","kind":"string"},{"type":"text","text":"'","kind":"delimiter"},{"type":"text","text":"\"","kind":"content"},{"type":"text","text":"'","kind":"delimiter"},{"type":"block","action":"close","kind":"string"},{"type":"text","text":",","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"block","action":"open","kind":"string"},{"type":"text","text":"'","kind":"delimiter"},{"type":"text","text":"\\\"","kind":"content"},{"type":"text","text":"'","kind":"delimiter"},{"type":"block","action":"close","kind":"string"},{"type":"text","text":")","kind":"operator"},{"type":"text","text":"\n ","kind":"space"},{"type":"text","text":"val","kind":"keyword"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"numbers","kind":"ident"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"=","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"listOf","kind":"ident"},{"type":"text","text":"(","kind":"operator"},{"type":"text","text":"0","kind":"integer"},{"type":"text","text":",","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"12","kind":"integer"},{"type":"text","text":",","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"1.0f","kind":"float"},{"type":"text","text":",","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"1.0","kind":"float"},{"type":"text","text":",","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"1L","kind":"integer"},{"type":"text","text":",","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"0x1f","kind":"hex"},{"type":"text","text":",","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"-","kind":"operator"},{"type":"text","text":"1","kind":"integer"},{"type":"text","text":",","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"-","kind":"operator"},{"type":"text","text":"12","kind":"integer"},{"type":"text","text":",","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"-","kind":"operator"},{"type":"text","text":"1.0f","kind":"float"},{"type":"text","text":",","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"-","kind":"operator"},{"type":"text","text":"1.0","kind":"float"},{"type":"text","text":",","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"-","kind":"operator"},{"type":"text","text":"1L","kind":"integer"},{"type":"text","text":")","kind":"operator"},{"type":"text","text":"\n\n ","kind":"space"},{"type":"text","text":"val","kind":"keyword"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"template","kind":"ident"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"=","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"block","action":"open","kind":"string"},{"type":"text","text":"\"","kind":"delimiter"},{"type":"text","text":"abc","kind":"content"},{"type":"text","text":"${","kind":"operator"},{"type":"block","action":"open","kind":"initial"},{"type":"text","text":"1","kind":"integer"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"+","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"block","action":"open","kind":"string"},{"type":"text","text":"\"","kind":"delimiter"},{"type":"text","text":"b","kind":"content"},{"type":"text","text":"\"","kind":"delimiter"},{"type":"block","action":"close","kind":"string"},{"type":"text","text":"}","kind":"operator"},{"type":"block","action":"close","kind":"initial"},{"type":"text","text":"def","kind":"content"},{"type":"text","text":"\"","kind":"delimiter"},{"type":"block","action":"close","kind":"string"},{"type":"text","text":"\n ","kind":"space"},{"type":"text","text":"val","kind":"keyword"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"template2","kind":"ident"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"=","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"block","action":"open","kind":"string"},{"type":"text","text":"\"","kind":"delimiter"},{"type":"text","text":"abc","kind":"content"},{"type":"text","text":"${","kind":"operator"},{"type":"block","action":"open","kind":"initial"},{"type":"text","text":"1","kind":"integer"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"+","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"block","action":"open","kind":"string"},{"type":"text","text":"'","kind":"delimiter"},{"type":"text","text":"b","kind":"content"},{"type":"text","text":"'","kind":"delimiter"},{"type":"block","action":"close","kind":"string"},{"type":"text","text":"}","kind":"operator"},{"type":"block","action":"close","kind":"initial"},{"type":"text","text":"def","kind":"content"},{"type":"text","text":"\"","kind":"delimiter"},{"type":"block","action":"close","kind":"string"},{"type":"text","text":"\n ","kind":"space"},{"type":"text","text":"val","kind":"keyword"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"template3","kind":"ident"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"=","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"block","action":"open","kind":"string"},{"type":"text","text":"\"","kind":"delimiter"},{"type":"text","text":"abc ","kind":"content"},{"type":"text","text":"$var","kind":"ident"},{"type":"text","text":" def","kind":"content"},{"type":"text","text":"\"","kind":"delimiter"},{"type":"block","action":"close","kind":"string"},{"type":"text","text":"\n\n ","kind":"space"},{"type":"text","text":"val","kind":"keyword"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"multiline","kind":"ident"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"=","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"block","action":"open","kind":"string"},{"type":"text","text":"\"\"\"","kind":"delimiter"},{"type":"text","text":" first line ","kind":"content"},{"type":"text","text":"$var","kind":"ident"},{"type":"text","text":" ","kind":"content"},{"type":"text","text":"${","kind":"operator"},{"type":"block","action":"open","kind":"initial"},{"type":"text","text":"1","kind":"integer"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"+","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"1","kind":"integer"},{"type":"text","text":"}","kind":"operator"},{"type":"block","action":"close","kind":"initial"},{"type":"text","text":"\n second line\n and quotes: ' ","kind":"content"},{"type":"text","text":"\"","kind":"content"},{"type":"text","text":" '' ","kind":"content"},{"type":"text","text":"\"","kind":"content"},{"type":"text","text":"\"","kind":"content"},{"type":"text","text":" ok\n ","kind":"content"},{"type":"text","text":"\"\"\"","kind":"delimiter"},{"type":"block","action":"close","kind":"string"},{"type":"text","text":"\n\n ","kind":"space"},{"type":"text","text":"val","kind":"keyword"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"innerBraces","kind":"ident"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"=","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"block","action":"open","kind":"string"},{"type":"text","text":"\"","kind":"delimiter"},{"type":"text","text":" before ","kind":"content"},{"type":"text","text":"${","kind":"operator"},{"type":"block","action":"open","kind":"initial"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"if","kind":"keyword"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"(","kind":"operator"},{"type":"text","text":"true","kind":"predefined_constant"},{"type":"text","text":")","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"{","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"1","kind":"integer"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"}","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"else","kind":"keyword"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"{","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"2","kind":"integer"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"}","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"}","kind":"operator"},{"type":"block","action":"close","kind":"initial"},{"type":"text","text":"\"","kind":"delimiter"},{"type":"block","action":"close","kind":"string"},{"type":"text","text":"\n\n ","kind":"space"},{"type":"text","text":"var","kind":"keyword"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"v","kind":"ident"},{"type":"text","text":":","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"Int","kind":"type"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"=","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"0","kind":"integer"},{"type":"text","text":"\n\n ","kind":"space"},{"type":"text","text":"fun","kind":"keyword"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"function","kind":"ident"},{"type":"text","text":"(","kind":"operator"},{"type":"text","text":")","kind":"operator"},{"type":"text","text":":","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"ReturnType","kind":"ident"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"{","kind":"operator"},{"type":"text","text":"\n ","kind":"space"},{"type":"text","text":"}","kind":"operator"},{"type":"text","text":"\n\n ","kind":"space"},{"type":"text","text":"fun","kind":"keyword"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"<","kind":"operator"},{"type":"text","text":"T","kind":"ident"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":":","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"Any","kind":"type"},{"type":"text","text":">","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"parametrizedFunction","kind":"ident"},{"type":"text","text":"(","kind":"operator"},{"type":"text","text":")","kind":"operator"},{"type":"text","text":":","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"T","kind":"ident"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"=","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"TODO","kind":"comment"},{"type":"text","text":"(","kind":"operator"},{"type":"text","text":")","kind":"operator"},{"type":"text","text":"\n\n ","kind":"space"},{"type":"text","text":"fun","kind":"keyword"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"references","kind":"ident"},{"type":"text","text":"(","kind":"operator"},{"type":"text","text":")","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"{","kind":"operator"},{"type":"text","text":"\n ","kind":"space"},{"type":"text","text":"super","kind":"local_variable"},{"type":"text","text":".","kind":"operator"},{"type":"text","text":"references","kind":"ident"},{"type":"text","text":"(","kind":"operator"},{"type":"text","text":")","kind":"operator"},{"type":"text","text":"\n ","kind":"space"},{"type":"text","text":"this","kind":"local_variable"},{"type":"text","text":".","kind":"operator"},{"type":"text","text":"references","kind":"ident"},{"type":"text","text":"(","kind":"operator"},{"type":"text","text":")","kind":"operator"},{"type":"text","text":"\n ","kind":"space"},{"type":"text","text":"}","kind":"operator"},{"type":"text","text":"\n\n ","kind":"space"},{"type":"text","text":"@Annotation","kind":"annotation"},{"type":"text","text":"\n ","kind":"space"},{"type":"text","text":"class","kind":"keyword"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"Annotated","kind":"class"},{"type":"text","text":"\n\n ","kind":"space"},{"type":"text","text":"inner","kind":"keyword"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"class","kind":"keyword"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"Inner","kind":"class"},{"type":"text","text":"<","kind":"operator"},{"type":"text","text":"in","kind":"keyword"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"T","kind":"ident"},{"type":"text","text":",","kind":"operator"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"out","kind":"keyword"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"E","kind":"ident"},{"type":"text","text":">","kind":"operator"},{"type":"text","text":"\n\n ","kind":"space"},{"type":"text","text":"object","kind":"keyword"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"O","kind":"ident"},{"type":"text","text":"\n\n ","kind":"space"},{"type":"text","text":"companion","kind":"keyword"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"object","kind":"keyword"},{"type":"text","text":" ","kind":"space"},{"type":"text","text":"{","kind":"operator"},{"type":"text","text":"\n ","kind":"space"},{"type":"text","text":"}","kind":"operator"},{"type":"text","text":"\n","kind":"space"},{"type":"text","text":"}","kind":"operator"}] diff --git a/test/executable/suite.rb b/test/executable/suite.rb index f490a497..bf54cf23 100644 --- a/test/executable/suite.rb +++ b/test/executable/suite.rb @@ -194,18 +194,17 @@ def coderay args, options = {} end end - context 'highlighting a file without explicit input type (source.kt)' do + context 'Kotlin smoke test' do source_file = ROOT_DIR + 'test/executable/source.kt' - command = "#{source_file} -html" - - source = File.read source_file - - pre = %r{
(.*?)
}m - tag_class = /]*>/ should 'generate json' do - target = coderay("#{source_file} #{source_file}.json") - target = coderay("#{source_file} #{source_file}.html") + coderay("#{source_file} #{source_file}.actual.json") + # coderay("#{source_file} #{source_file}.actual.html") + + result = JSON.parse(File.read ("#{source_file}.actual.json")) + expected = JSON.parse(File.read ("#{source_file}.expected.json")) + + assert_equal expected, result end # should 'respect the file extension and highlight the input as Kotlin' do From 356644ed055063de44fe5edb7abddfbe95782c2c Mon Sep 17 00:00:00 2001 From: Sergey Mashkov Date: Mon, 7 Aug 2017 00:26:57 +0300 Subject: [PATCH 05/11] Kotlin: fix regexp escapes --- lib/coderay/scanners/kotlin.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/coderay/scanners/kotlin.rb b/lib/coderay/scanners/kotlin.rb index 40cd3740..a0215e06 100644 --- a/lib/coderay/scanners/kotlin.rb +++ b/lib/coderay/scanners/kotlin.rb @@ -88,11 +88,11 @@ def scan_tokens encoder, options encoder.text_token match, kind elsif (match = scan(/ \.(?!\d) | [,?:()\[\]] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? /x)) encoder.text_token match, :operator - elsif (match = scan(/{/)) + elsif (match = scan(/\{/)) class_name_follows = false encoder.text_token match, :operator states << :initial - elsif (match = scan(/}/)) + elsif (match = scan(/\}/)) encoder.text_token match, :operator unless states.empty? @@ -131,7 +131,7 @@ def scan_tokens encoder, options end when :string - if (match = scan(/\${/)) + if (match = scan(/\$\{/)) encoder.text_token match, :operator state = :initial @@ -163,7 +163,7 @@ def scan_tokens encoder, options raise_inspect "else case \" reached; %p not handled." % peek(1), encoder end when :multiline_string - if (match = scan(/\${/)) + if (match = scan(/\$\{/)) encoder.text_token match, :operator state = :initial From a88b0a9f7921b6edc5cdf76d5f89d3ac6ced2516 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cmikrethor=E2=80=9D?= <“mikrethor@gmail.com”> Date: Tue, 21 Dec 2021 11:29:23 -0500 Subject: [PATCH 06/11] Formating for to comply with rubocop --- lib/coderay/scanners/kotlin.rb | 257 ++++++++++++++++----------------- 1 file changed, 128 insertions(+), 129 deletions(-) diff --git a/lib/coderay/scanners/kotlin.rb b/lib/coderay/scanners/kotlin.rb index a0215e06..9d8211e5 100644 --- a/lib/coderay/scanners/kotlin.rb +++ b/lib/coderay/scanners/kotlin.rb @@ -25,20 +25,19 @@ class interface object fun init get set inner ] - TYPES = %w[ + TYPES = %w[ Boolean Byte Char class Double Float Int Long Short Unit Nothing Any ] STRING_CONTENT_PATTERN = { - "'" => /[^\\'$]+/, - '"' => /[^\\"$]+/, - } # :nodoc:s + "'" => /[^\\'$]+/, + '"' => /[^\\"$]+/, + } # :nodoc:s IDENT_KIND = Java::IDENT_KIND.dup. - add(TYPES, :type). - add(KOTLIN_KEYWORDS, :keyword). - add(KOTLIN_MODIFIERS, :keyword) # :nodoc: - + add(TYPES, :type). + add(KOTLIN_KEYWORDS, :keyword). + add(KOTLIN_MODIFIERS, :keyword) # :nodoc: def setup @state = :initial @@ -56,137 +55,137 @@ def scan_tokens encoder, options case state - when :initial - if (match = scan(/ \s+ | \\\n /x)) - encoder.text_token match, :space - next - elsif (match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)) - encoder.text_token match, :comment - next - elsif (match = scan(/ TODO \( /ox)) - encoder.text_token "TODO", :comment - encoder.text_token "(", :operator - elsif (match = scan(/ #{IDENT} /ox)) - kind = IDENT_KIND[match] - if last_token_dot - kind = :ident - elsif class_name_follows - kind = :class - class_name_follows = false - else - case match - when 'import' - package_name_expected = :include - when 'package' - package_name_expected = :namespace - when 'class', 'interface' - class_name_follows = true - else - # nothing - end - end - encoder.text_token match, kind - elsif (match = scan(/ \.(?!\d) | [,?:()\[\]] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? /x)) - encoder.text_token match, :operator - elsif (match = scan(/\{/)) + when :initial + if (match = scan(/ \s+ | \\\n /x)) + encoder.text_token match, :space + next + elsif (match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)) + encoder.text_token match, :comment + next + elsif (match = scan(/ TODO \( /ox)) + encoder.text_token "TODO", :comment + encoder.text_token "(", :operator + elsif (match = scan(/ #{IDENT} /ox)) + kind = IDENT_KIND[match] + if last_token_dot + kind = :ident + elsif class_name_follows + kind = :class class_name_follows = false - encoder.text_token match, :operator - states << :initial - elsif (match = scan(/\}/)) - encoder.text_token match, :operator - - unless states.empty? - state = states.pop - - if state == :string || state == :multiline_string - string_delimiter = delimiters.pop - encoder.end_group :initial - end + else + case match + when 'import' + package_name_expected = :include + when 'package' + package_name_expected = :namespace + when 'class', 'interface' + class_name_follows = true + else + # nothing end - elsif (match = scan(/"""/)) - state = :multiline_string - encoder.begin_group :string - encoder.text_token match, :delimiter - elsif (match = scan(/["']/)) - state = :string - encoder.begin_group state - string_delimiter = match - encoder.text_token match, :delimiter - elsif check(/[\d.]/) - if (match = scan(/0[xX][0-9A-Fa-f]+/)) - encoder.text_token match, :hex - elsif (match = scan(/(?>0[0-7]+)(?![89.eEfF])/)) - encoder.text_token match, :octal - elsif (match = scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)) - encoder.text_token match, :float - elsif (match = scan(/\d+[lL]?/)) - encoder.text_token match, :integer + end + encoder.text_token match, kind + elsif (match = scan(/ \.(?!\d) | [,?:()\[\]] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? /x)) + encoder.text_token match, :operator + elsif (match = scan(/\{/)) + class_name_follows = false + encoder.text_token match, :operator + states << :initial + elsif (match = scan(/\}/)) + encoder.text_token match, :operator + + unless states.empty? + state = states.pop + + if state == :string || state == :multiline_string + string_delimiter = delimiters.pop + encoder.end_group :initial end - - elsif (match = scan(/ @ #{IDENT} /ox)) - encoder.text_token match, :annotation - - else - encoder.text_token getch, :error + end + elsif (match = scan(/"""/)) + state = :multiline_string + encoder.begin_group :string + encoder.text_token match, :delimiter + elsif (match = scan(/["']/)) + state = :string + encoder.begin_group state + string_delimiter = match + encoder.text_token match, :delimiter + elsif check(/[\d.]/) + if (match = scan(/0[xX][0-9A-Fa-f]+/)) + encoder.text_token match, :hex + elsif (match = scan(/(?>0[0-7]+)(?![89.eEfF])/)) + encoder.text_token match, :octal + elsif (match = scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)) + encoder.text_token match, :float + elsif (match = scan(/\d+[lL]?/)) + encoder.text_token match, :integer end - when :string - if (match = scan(/\$\{/)) - encoder.text_token match, :operator - - state = :initial - encoder.begin_group state + elsif (match = scan(/ @ #{IDENT} /ox)) + encoder.text_token match, :annotation - delimiters << string_delimiter - states << :string - string_delimiter = nil - elsif (match = scan(/ \$ #{IDENT} /ox)) - encoder.text_token match, :ident - elsif (match = scan(STRING_CONTENT_PATTERN[string_delimiter])) - encoder.text_token match, :content - elsif (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)) - if string_delimiter == "'" && !(match == "\\\\" || match == "\\'") - encoder.text_token match, :content - else - encoder.text_token match, :char - end - elsif (match = scan(/["']/)) - encoder.text_token match, :delimiter - encoder.end_group state - state = :initial - string_delimiter = nil - elsif (match = scan(/ \\ | $ /x)) - encoder.end_group state - state = :initial - encoder.text_token match, :error unless match.empty? - else - raise_inspect "else case \" reached; %p not handled." % peek(1), encoder - end - when :multiline_string - if (match = scan(/\$\{/)) - encoder.text_token match, :operator - - state = :initial - encoder.begin_group state - - delimiters << nil - states << :multiline_string - elsif (match = scan(/ \$ #{IDENT} /ox)) - encoder.text_token match, :ident - elsif (match = scan(/ [^$\\"]+ /x)) - encoder.text_token match, :content - elsif (match = scan(/"""/x)) - encoder.text_token match, :delimiter - encoder.end_group :string - state = :initial - string_delimiter = nil - elsif (match = scan(/"/)) + else + encoder.text_token getch, :error + end + + when :string + if (match = scan(/\$\{/)) + encoder.text_token match, :operator + + state = :initial + encoder.begin_group state + + delimiters << string_delimiter + states << :string + string_delimiter = nil + elsif (match = scan(/ \$ #{IDENT} /ox)) + encoder.text_token match, :ident + elsif (match = scan(STRING_CONTENT_PATTERN[string_delimiter])) + encoder.text_token match, :content + elsif (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)) + if string_delimiter == "'" && !(match == "\\\\" || match == "\\'") encoder.text_token match, :content else - raise_inspect "else case \" reached; %p not handled." % peek(1), encoder + encoder.text_token match, :char end + elsif (match = scan(/["']/)) + encoder.text_token match, :delimiter + encoder.end_group state + state = :initial + string_delimiter = nil + elsif (match = scan(/ \\ | $ /x)) + encoder.end_group state + state = :initial + encoder.text_token match, :error unless match.empty? + else + raise_inspect "else case \" reached; %p not handled." % peek(1), encoder + end + when :multiline_string + if (match = scan(/\$\{/)) + encoder.text_token match, :operator + + state = :initial + encoder.begin_group state + + delimiters << nil + states << :multiline_string + elsif (match = scan(/ \$ #{IDENT} /ox)) + encoder.text_token match, :ident + elsif (match = scan(/ [^$\\"]+ /x)) + encoder.text_token match, :content + elsif (match = scan(/"""/x)) + encoder.text_token match, :delimiter + encoder.end_group :string + state = :initial + string_delimiter = nil + elsif (match = scan(/"/)) + encoder.text_token match, :content else - raise_inspect 'Unknown state', encoder + raise_inspect "else case \" reached; %p not handled." % peek(1), encoder + end + else + raise_inspect 'Unknown state', encoder end end From 04aeafa671cfddcc9ff1a5cbc34ce0c731dbe435 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cmikrethor=E2=80=9D?= <“mikrethor@gmail.com”> Date: Tue, 21 Dec 2021 12:41:48 -0500 Subject: [PATCH 07/11] Indent --- lib/coderay/scanners/kotlin.rb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/coderay/scanners/kotlin.rb b/lib/coderay/scanners/kotlin.rb index 9d8211e5..a5a95dd3 100644 --- a/lib/coderay/scanners/kotlin.rb +++ b/lib/coderay/scanners/kotlin.rb @@ -15,7 +15,7 @@ class Kotlin < Java class interface object fun init get set in out if when else for while do return break continue - ] + ] KOTLIN_MODIFIERS = %w[ annotation enum data sealed companion @@ -23,15 +23,15 @@ class interface object fun init get set public protected private internal inline suspend inner - ] + ] TYPES = %w[ - Boolean Byte Char class Double Float Int Long Short Unit Nothing Any - ] + Boolean Byte Char class Double Float Int Long Short Unit Nothing Any + ] STRING_CONTENT_PATTERN = { "'" => /[^\\'$]+/, - '"' => /[^\\"$]+/, + '"' => /[^\\"$]+/ } # :nodoc:s IDENT_KIND = Java::IDENT_KIND.dup. From 26092c379f21625d0d17b7914a2aa70c40f721e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cmikrethor=E2=80=9D?= <“mikrethor@gmail.com”> Date: Tue, 21 Dec 2021 14:50:39 -0500 Subject: [PATCH 08/11] include? --- lib/coderay/scanners/kotlin.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/coderay/scanners/kotlin.rb b/lib/coderay/scanners/kotlin.rb index a5a95dd3..5750f136 100644 --- a/lib/coderay/scanners/kotlin.rb +++ b/lib/coderay/scanners/kotlin.rb @@ -75,9 +75,9 @@ def scan_tokens encoder, options else case match when 'import' - package_name_expected = :include + :include when 'package' - package_name_expected = :namespace + :namespace when 'class', 'interface' class_name_follows = true else @@ -97,7 +97,7 @@ def scan_tokens encoder, options unless states.empty? state = states.pop - if state == :string || state == :multiline_string + if [:multiline_string, :string].include? state string_delimiter = delimiters.pop encoder.end_group :initial end @@ -144,7 +144,7 @@ def scan_tokens encoder, options elsif (match = scan(STRING_CONTENT_PATTERN[string_delimiter])) encoder.text_token match, :content elsif (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)) - if string_delimiter == "'" && !(match == "\\\\" || match == "\\'") + if string_delimiter == "'" && !(%W[\\\\ \\'].include? match) encoder.text_token match, :content else encoder.text_token match, :char From 4ea6ae22db61656ec0e089bd5792a37eb841d43a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cmikrethor=E2=80=9D?= <“mikrethor@gmail.com”> Date: Tue, 21 Dec 2021 14:53:58 -0500 Subject: [PATCH 09/11] redundant else --- lib/coderay/scanners/kotlin.rb | 3 --- 1 file changed, 3 deletions(-) diff --git a/lib/coderay/scanners/kotlin.rb b/lib/coderay/scanners/kotlin.rb index 5750f136..b472bb25 100644 --- a/lib/coderay/scanners/kotlin.rb +++ b/lib/coderay/scanners/kotlin.rb @@ -1,10 +1,8 @@ module CodeRay module Scanners - load :java class Kotlin < Java - register_for :kotlin file_extension 'kt' @@ -81,7 +79,6 @@ def scan_tokens encoder, options when 'class', 'interface' class_name_follows = true else - # nothing end end encoder.text_token match, kind From b82b9e4e450afbd051d3f01aea2c9d56e72a1f06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cmikrethor=E2=80=9D?= <“mikrethor@gmail.com”> Date: Tue, 21 Dec 2021 14:54:43 -0500 Subject: [PATCH 10/11] redundant else --- lib/coderay/scanners/kotlin.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/coderay/scanners/kotlin.rb b/lib/coderay/scanners/kotlin.rb index b472bb25..21808e4b 100644 --- a/lib/coderay/scanners/kotlin.rb +++ b/lib/coderay/scanners/kotlin.rb @@ -71,6 +71,7 @@ def scan_tokens encoder, options kind = :class class_name_follows = false else + #noinspection RubyEmptyElseBlockInspection case match when 'import' :include @@ -78,7 +79,6 @@ def scan_tokens encoder, options :namespace when 'class', 'interface' class_name_follows = true - else end end encoder.text_token match, kind From 1a0c3e4dd0b480ba74f902a0da1fd5a893a4c8c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cmikrethor=E2=80=9D?= <“mikrethor@gmail.com”> Date: Tue, 21 Dec 2021 14:59:57 -0500 Subject: [PATCH 11/11] Missing space after # --- lib/coderay/scanners/kotlin.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/coderay/scanners/kotlin.rb b/lib/coderay/scanners/kotlin.rb index 21808e4b..d2b52a07 100644 --- a/lib/coderay/scanners/kotlin.rb +++ b/lib/coderay/scanners/kotlin.rb @@ -71,7 +71,7 @@ def scan_tokens encoder, options kind = :class class_name_follows = false else - #noinspection RubyEmptyElseBlockInspection + # noinspection RubyEmptyElseBlockInspection case match when 'import' :include