From f4d04110c0ca93619cade4b043a4e9b62103eb53 Mon Sep 17 00:00:00 2001 From: David Varela Date: Thu, 2 Aug 2018 02:50:36 -0700 Subject: [PATCH] Fix parser (lexer) (#553) * Fix parser * Add test for REPL `?` help syntax --- stdlib/Pkg/src/REPLMode.jl | 24 ++++++++++++++---------- stdlib/Pkg/test/repl.jl | 12 +++++++----- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/stdlib/Pkg/src/REPLMode.jl b/stdlib/Pkg/src/REPLMode.jl index 6510d4bf66f95..358e10038b4fe 100644 --- a/stdlib/Pkg/src/REPLMode.jl +++ b/stdlib/Pkg/src/REPLMode.jl @@ -200,7 +200,7 @@ function parse(cmd::String)::Vector{Statement} # tokenize accoring to whitespace / quotes qwords = parse_quotes(cmd) # tokenzie unquoted tokens according to pkg REPL syntax - words::Vector{String} = collect(Iterators.flatten(map(qword2word, qwords))) + words = lex(qwords) # break up words according to ";"(doing this early makes subsequent processing easier) word_groups = group_words(words) # create statements @@ -210,7 +210,7 @@ end # vector of words -> structured statement # minimal checking is done in this phase -function Statement(words) +function Statement(words)::Statement is_option(word) = first(word) == '-' statement = Statement() @@ -257,10 +257,16 @@ end const lex_re = r"^[\?\./\+\-](?!\-) | ((git|ssh|http(s)?)|(git@[\w\-\.]+))(:(//)?)([\w\.@\:/\-~]+)(\.git)(/)? | [^@\#\s;]+\s*=\s*[^@\#\s;]+ | \#\s*[^@\#\s;]* | @\s*[^@\#\s;]* | [^@\#\s;]+|;"x -function qword2word(qword::QuotedWord) - return qword.isquoted ? [qword.word] : map(m->m.match, eachmatch(lex_re, " $(qword.word)")) - # ^ - # note: space before `$word` is necessary to keep using current `lex_re` +function lex(qwords::Vector{QuotedWord})::Vector{String} + words = String[] + for qword in qwords + if qword.isquoted + push!(words, qword.word) + else + append!(words, map(m->m.match, eachmatch(lex_re, qword.word))) + end + end + return words end function parse_quotes(cmd::String)::Vector{QuotedWord} @@ -279,18 +285,16 @@ function parse_quotes(cmd::String)::Vector{QuotedWord} if in_singlequote # raw char push!(token_in_progress, c) else # delimiter + in_doublequote ? push_token!(true) : push_token!(false) in_doublequote = !in_doublequote - push_token!(true) end elseif c == '\'' if in_doublequote # raw char push!(token_in_progress, c) else # delimiter + in_singlequote ? push_token!(true) : push_token!(false) in_singlequote = !in_singlequote - push_token!(true) end - elseif c == ' ' && !(in_doublequote || in_singlequote) - push_token!(false) else push!(token_in_progress, c) end diff --git a/stdlib/Pkg/test/repl.jl b/stdlib/Pkg/test/repl.jl index 9486804e9f97c..6dc3e00fe7058 100644 --- a/stdlib/Pkg/test/repl.jl +++ b/stdlib/Pkg/test/repl.jl @@ -72,6 +72,10 @@ temp_pkg_dir() do project_path end @testset "tokens" begin + statement = Pkg.REPLMode.parse("?dev")[1] + @test statement.command.kind == Pkg.REPLMode.CMD_HELP + @test length(statement.arguments) == 1 + @test statement.arguments[1] == "dev" statement = Pkg.REPLMode.parse("add git@github.com:JuliaLang/Example.jl.git")[1] @test "add" in statement.command.names @test statement.arguments[1] == "git@github.com:JuliaLang/Example.jl.git" @@ -844,11 +848,9 @@ end @test qwords[1].isquoted @test qwords[1].word == "Don't" @test !qwords[2].isquoted - @test qwords[2].word == "forget" - @test !qwords[3].isquoted - @test qwords[3].word == "to" - @test qwords[4].isquoted - @test qwords[4].word == "\"test\"" + @test qwords[2].word == " forget to " + @test qwords[3].isquoted + @test qwords[3].word == "\"test\"" @test_throws CommandError Pkg.REPLMode.parse_quotes("Don't") @test_throws CommandError Pkg.REPLMode.parse_quotes("Unterminated \"quot") end