diff --git a/NEWS.md b/NEWS.md index e5d3619986d08..74232e692af2d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -24,7 +24,7 @@ New library functions Standard library changes ------------------------ -* Cmd interpolation (``` `$(x::Cmd) a b c` ``` where) now propagates `x`'s process flags (environment, flags, working directory, etc) if `x` is the first interpolant and errors otherwise ([#24353]). +* `Regex` can now be multiplied (`*`) and exponentiated (`^`), like strings ([#23422]). #### LinearAlgebra diff --git a/base/regex.jl b/base/regex.jl index 3bee6dbd64947..084328a4a80f0 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -517,3 +517,107 @@ function hash(r::Regex, h::UInt) h = hash(r.compile_options, h) h = hash(r.match_options, h) end + +## String operations ## + +""" + *(s::Regex, t::Union{Regex,AbstractString,AbstractChar}) -> Regex + *(s::Union{Regex,AbstractString,AbstractChar}, t::Regex) -> Regex + +Concatenate regexes, strings and/or characters, producing a [`Regex`](@ref). +String and character arguments must be matched exactly in the resulting regex, +meaning that the contained characters are devoid of any special meaning +(they are quoted with "\\Q" and "\\E"). + +!!! compat "Julia 1.3" + This method requires at least Julia 1.3. + +# Examples +```jldoctest +julia> match(r"Hello|Good bye" * ' ' * "world", "Hello world") +RegexMatch("Hello world") + +julia> r = r"a|b" * "c|d" +r"(?:a|b)\\Qc|d\\E" + +julia> match(r, "ac") == nothing +true + +julia> match(r, "ac|d") +RegexMatch("ac|d") +``` +""" +function *(r1::Union{Regex,AbstractString,AbstractChar}, rs::Union{Regex,AbstractString,AbstractChar}...) + mask = PCRE.CASELESS | PCRE.MULTILINE | PCRE.DOTALL | PCRE.EXTENDED # imsx + match_opts = nothing # all args must agree on this + compile_opts = nothing # all args must agree on this + shared = mask + for r in (r1, rs...) + r isa Regex || continue + if match_opts == nothing + match_opts = r.match_options + compile_opts = r.compile_options & ~mask + else + r.match_options == match_opts && + r.compile_options & ~mask == compile_opts || + throw(ArgumentError("cannot multiply regexes: incompatible options")) + end + shared &= r.compile_options + end + unshared = mask & ~shared + Regex(string(wrap_string(r1, unshared), wrap_string.(rs, Ref(unshared))...), compile_opts | shared, match_opts) +end + +*(r::Regex) = r # avoids wrapping r in a useless subpattern + +wrap_string(r::Regex, unshared::UInt32) = string("(?", regex_opts_str(r.compile_options & unshared), ':', r.pattern, ')') +# if s contains raw"\E", split '\' and 'E' within two distinct \Q...\E groups: +wrap_string(s::AbstractString, ::UInt32) = string("\\Q", replace(s, raw"\E" => raw"\\E\QE"), "\\E") +wrap_string(s::AbstractChar, ::UInt32) = string("\\Q", s, "\\E") + +regex_opts_str(opts) = (isassigned(_regex_opts_str) ? _regex_opts_str[] : init_regex())[opts] + +# UInt32 to String mapping for some compile options +const _regex_opts_str = Ref{ImmutableDict{UInt32,String}}() + +init_regex() = _regex_opts_str[] = foldl(0:15, init=ImmutableDict{UInt32,String}()) do d, o + opt = UInt32(0) + str = "" + if o & 1 != 0 + opt |= PCRE.CASELESS + str *= 'i' + end + if o & 2 != 0 + opt |= PCRE.MULTILINE + str *= 'm' + end + if o & 4 != 0 + opt |= PCRE.DOTALL + str *= 's' + end + if o & 8 != 0 + opt |= PCRE.EXTENDED + str *= 'x' + end + ImmutableDict(d, opt => str) +end + + +""" + ^(s::Regex, n::Integer) + +Repeat a regex `n` times. + +!!! compat "Julia 1.3" + This method requires at least Julia 1.3. + +# Examples +```jldoctest +julia> r"Test "^2 +r"(?:Test ){2}" + +julia> match(r"Test "^2, "Test Test ") +RegexMatch("Test Test ") +``` +""" +^(r::Regex, i::Integer) = Regex(string("(?:", r.pattern, "){$i}"), r.compile_options, r.match_options) diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl index f119ab3f8d7c4..0fc71c88d285d 100644 --- a/stdlib/REPL/src/REPL.jl +++ b/stdlib/REPL/src/REPL.jl @@ -925,6 +925,7 @@ function setup_interface( oldpos = firstindex(input) firstline = true isprompt_paste = false + jl_prompt_len = 7 # "julia> " while oldpos <= lastindex(input) # loop until all lines have been executed if JL_PROMPT_PASTE[] # Check if the next statement starts with "julia> ", in that case @@ -934,7 +935,6 @@ function setup_interface( oldpos >= sizeof(input) && return end # Check if input line starts with "julia> ", remove it if we are in prompt paste mode - jl_prompt_len = 7 if (firstline || isprompt_paste) && startswith(SubString(input, oldpos), JULIA_PROMPT) isprompt_paste = true oldpos += jl_prompt_len @@ -959,7 +959,7 @@ function setup_interface( tail = lstrip(tail) end if isprompt_paste # remove indentation spaces corresponding to the prompt - tail = replace(tail, r"^ {7}"m => "") # 7: jl_prompt_len + tail = replace(tail, r"^"m * ' '^jl_prompt_len => "") end LineEdit.replace_line(s, tail, true) LineEdit.refresh_line(s) @@ -969,7 +969,7 @@ function setup_interface( line = strip(input[oldpos:prevind(input, pos)]) if !isempty(line) if isprompt_paste # remove indentation spaces corresponding to the prompt - line = replace(line, r"^ {7}"m => "") # 7: jl_prompt_len + line = replace(line, r"^"m * ' '^jl_prompt_len => "") end # put the line on the screen and history LineEdit.replace_line(s, line) diff --git a/test/regex.jl b/test/regex.jl index cb3fa965f8a50..76c36b76edf84 100644 --- a/test/regex.jl +++ b/test/regex.jl @@ -78,6 +78,46 @@ @test !endswith("abc", r"C") @test endswith("abc", r"C"i) + @testset "multiplication & exponentiation" begin + @test *(r"a") == r"a" + + @test r"a" * r"b" == r"(?:a)(?:b)" + @test r"a" * "b" == r"(?:a)\Qb\E" + @test r"a" * 'b' == r"(?:a)\Qb\E" + @test "a" * r"b" == r"\Qa\E(?:b)" + @test 'a' * r"b" == r"\Qa\E(?:b)" + for a = (r"a", "a", 'a'), + b = (r"b", "b", 'b'), + c = (r"c", "c", 'c') + a isa Regex || b isa Regex || c isa Regex || continue + @test match(a * b * c, "abc") !== nothing + end + for s = ["thiscat", "thishat", "thatcat", "thathat"] + @test match(r"this|that" * r"cat|hat", s) !== nothing + end + + @test r"a"i * r"b"i == r"(?:a)(?:b)"i + @test r"a"i * "b" == r"(?:a)\Qb\E"i + @test r"a"i * 'b' == r"(?:a)\Qb\E"i + @test "a" * r"b"i == r"\Qa\E(?:b)"i + @test 'a' * r"b"i == r"\Qa\E(?:b)"i + + @test r"a"i * r"b"m == r"(?i:a)(?m:b)" + @test r"a"im * r"b"m == r"(?i:a)(?:b)"m + @test r"a"im * r"b"im == r"(?:a)(?:b)"im + @test r"a"im * r"b"i == r"(?m:a)(?:b)"i + + r = r"" * raw"a\Eb|c" + @test match(r, raw"a\Eb|c").match == raw"a\Eb|c" + @test match(r, raw"c") == nothing + + # error for really incompatible options + @test_throws ArgumentError r"a" * Regex("b", Base.DEFAULT_COMPILER_OPTS & ~Base.PCRE.UCP, Base.DEFAULT_MATCH_OPTS) + @test_throws ArgumentError r"a" * Regex("b", Base.DEFAULT_COMPILER_OPTS, Base.DEFAULT_MATCH_OPTS & ~Base.PCRE.NO_UTF_CHECK) + + @test r"this|that"^2 == r"(?:this|that){2}" + end + # Test that PCRE throws the correct kind of error # TODO: Uncomment this once the corresponding change has propagated to CI #@test_throws ErrorException Base.PCRE.info(C_NULL, Base.PCRE.INFO_NAMECOUNT, UInt32)