forked from JuliaLang/julia
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
implement replace on String for multiple patterns (JuliaLang#40484)
This has been attempted before, sometimes fairly similar to this, but the attempts seemed to be either too simple or too complicated. This aims to be simple, and even beats one of the "handwritten" benchmark cases. Past issues (e.g. JuliaLang#25396) have proposed that using Regex may be faster, but in my tests, this handily bests even simplified regexes. There can be slow Regexes patterns that can cause this to exhibit O(n^2) behavior, but only if the one of the earlier patterns is a partial match for a later pattern Regex and that Regex always matches O(n) of the input stream. This is a case that is hopefully usually avoidable in practice. fixes JuliaLang#35327 fixes JuliaLang#39061 fixes JuliaLang#35414 fixes JuliaLang#29849 fixes JuliaLang#30457 fixes JuliaLang#25396
- Loading branch information
Showing
5 changed files
with
245 additions
and
36 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -312,6 +312,178 @@ end | |
|
||
end | ||
|
||
@testset "replace many" begin | ||
# PR 35414 Francesco Alemanno <[email protected]> | ||
@test replace("foobarbaz", "oo" => "zz", "ar" => "zz", "z" => "m") == "fzzbzzbam" | ||
substmp=["z" => "m", "oo" => "zz", "ar" => "zz"] | ||
for perm in [[1, 2, 3], [2, 1, 3], [3, 2, 1], [2, 3, 1], [1, 3, 2], [3, 1, 2]] | ||
@test replace("foobarbaz", substmp[perm]...) == "fzzbzzbam" | ||
@test replace("foobarbaz", substmp[perm]..., count=2) == "fzzbzzbaz" | ||
@test replace("foobarbaz", substmp[perm]..., count=1) == "fzzbarbaz" | ||
end | ||
@test replace("foobarbaz", "z" => "m", r"a.*a" => uppercase) == "foobARBAm" | ||
@test replace("foobarbaz", 'o' => 'z', 'a' => 'q', 'z' => 'm') == "fzzbqrbqm" | ||
|
||
|
||
# PR #25732 Klaus Crusius <[email protected]> | ||
@test replace("\u2202", '*' => '\0', "" => "") == "\u2202" | ||
|
||
@test replace("foobar", 'o' => '0', "" => "") == "f00bar" | ||
@test replace("foobar", 'o' => '0', count=1, "" => "") == "foobar" | ||
@test replace("foobar", 'o' => '0', count=2, "" => "") == "f0obar" | ||
@test replace("foobar", 'o' => "", "" => "") == "fbar" | ||
@test replace("foobar", 'o' => "", count=1, "" => "") == "foobar" | ||
@test replace("foobar", 'o' => "", count=2, "" => "") == "fobar" | ||
@test replace("foobar", 'f' => 'F', "" => "") == "Foobar" | ||
@test replace("foobar", 'r' => 'R', "" => "") == "foobaR" | ||
|
||
@test replace("foofoofoo", "foo" => "bar", "" => "") == "barbarbar" | ||
@test replace("foobarfoo", "foo" => "baz", "" => "") == "bazbarbaz" | ||
@test replace("barfoofoo", "foo" => "baz", "" => "") == "barbazbaz" | ||
|
||
@test replace("", "" => "", "" => "") == "" | ||
@test replace("", "" => "x", "" => "") == "x" | ||
@test replace("", "x" => "y", "" => "") == "" | ||
|
||
@test replace("abcd", "" => "^", "" => "") == "^a^b^c^d^" | ||
@test replace("abcd", "b" => "^", "" => "") == "a^cd" | ||
@test replace("abcd", r"b?" => "^", "" => "") == "^a^c^d^" | ||
@test replace("abcd", r"b+" => "^", "" => "") == "a^cd" | ||
@test replace("abcd", r"b?c?" => "^", "" => "") == "^a^d^" | ||
@test replace("abcd", r"[bc]?" => "^", "" => "") == "^a^^d^" | ||
|
||
@test replace("foobarfoo", r"(fo|ba)" => "xx", "" => "") == "xxoxxrxxo" | ||
@test replace("foobarfoo", r"(foo|ba)" => "bar", "" => "") == "barbarrbar" | ||
|
||
@test replace("foobar", 'o' => 'ø', "" => "") == "føøbar" | ||
@test replace("foobar", 'o' => 'ø', count=2, "" => "") == "føobar" | ||
@test replace("føøbar", 'ø' => 'o', "" => "") == "foobar" | ||
@test replace("føøbar", 'ø' => 'o', count=2, "" => "") == "foøbar" | ||
@test replace("føøbar", 'ø' => 'ö', "" => "") == "fööbar" | ||
@test replace("føøbar", 'ø' => 'ö', count=2, "" => "") == "föøbar" | ||
@test replace("føøbar", 'ø' => "", "" => "") == "fbar" | ||
@test replace("føøbar", 'ø' => "", count=2, "" => "") == "føbar" | ||
@test replace("føøbar", 'f' => 'F', "" => "") == "Føøbar" | ||
@test replace("ḟøøbar", 'ḟ' => 'F', "" => "") == "Føøbar" | ||
@test replace("føøbar", 'f' => 'Ḟ', "" => "") == "Ḟøøbar" | ||
@test replace("ḟøøbar", 'ḟ' => 'Ḟ', "" => "") == "Ḟøøbar" | ||
@test replace("føøbar", 'r' => 'R', "" => "") == "føøbaR" | ||
@test replace("føøbaṙ", 'ṙ' => 'R', "" => "") == "føøbaR" | ||
@test replace("føøbar", 'r' => 'Ṙ', "" => "") == "føøbaṘ" | ||
@test replace("føøbaṙ", 'ṙ' => 'Ṙ', "" => "") == "føøbaṘ" | ||
|
||
@test replace("ḟøøḟøøḟøø", "ḟøø" => "bar", "" => "") == "barbarbar" | ||
@test replace("ḟøøbarḟøø", "ḟøø" => "baz", "" => "") == "bazbarbaz" | ||
@test replace("barḟøøḟøø", "ḟøø" => "baz", "" => "") == "barbazbaz" | ||
|
||
@test replace("foofoofoo", "foo" => "ƀäṙ", "" => "") == "ƀäṙƀäṙƀäṙ" | ||
@test replace("fooƀäṙfoo", "foo" => "baz", "" => "") == "bazƀäṙbaz" | ||
@test replace("ƀäṙfoofoo", "foo" => "baz", "" => "") == "ƀäṙbazbaz" | ||
|
||
@test replace("foofoofoo", "foo" => "bar", "" => "") == "barbarbar" | ||
@test replace("foobarfoo", "foo" => "ƀäż", "" => "") == "ƀäżbarƀäż" | ||
@test replace("barfoofoo", "foo" => "ƀäż", "" => "") == "barƀäżƀäż" | ||
|
||
@test replace("ḟøøḟøøḟøø", "ḟøø" => "ƀäṙ", "" => "") == "ƀäṙƀäṙƀäṙ" | ||
@test replace("ḟøøƀäṙḟøø", "ḟøø" => "baz", "" => "") == "bazƀäṙbaz" | ||
@test replace("ƀäṙḟøøḟøø", "ḟøø" => "baz", "" => "") == "ƀäṙbazbaz" | ||
|
||
@test replace("ḟøøḟøøḟøø", "ḟøø" => "bar", "" => "") == "barbarbar" | ||
@test replace("ḟøøbarḟøø", "ḟøø" => "ƀäż", "" => "") == "ƀäżbarƀäż" | ||
@test replace("barḟøøḟøø", "ḟøø" => "ƀäż", "" => "") == "barƀäżƀäż" | ||
|
||
@test replace("ḟøøḟøøḟøø", "ḟøø" => "ƀäṙ", "" => "") == "ƀäṙƀäṙƀäṙ" | ||
@test replace("ḟøøƀäṙḟøø", "ḟøø" => "ƀäż", "" => "") == "ƀäżƀäṙƀäż" | ||
@test replace("ƀäṙḟøøḟøø", "ḟøø" => "ƀäż", "" => "") == "ƀäṙƀäżƀäż" | ||
|
||
@test replace("", "" => "ẍ", "" => "") == "ẍ" | ||
@test replace("", "ẍ" => "ÿ", "" => "") == "" | ||
|
||
@test replace("äƀçđ", "" => "π", "" => "") == "πäπƀπçπđπ" | ||
@test replace("äƀçđ", "ƀ" => "π", "" => "") == "äπçđ" | ||
@test replace("äƀçđ", r"ƀ?" => "π", "" => "") == "πäπçπđπ" | ||
@test replace("äƀçđ", r"ƀ+" => "π", "" => "") == "äπçđ" | ||
@test replace("äƀçđ", r"ƀ?ç?" => "π", "" => "") == "πäπđπ" | ||
@test replace("äƀçđ", r"[ƀç]?" => "π", "" => "") == "πäππđπ" | ||
|
||
@test replace("foobarfoo", r"(fo|ba)" => "ẍẍ", "" => "") == "ẍẍoẍẍrẍẍo" | ||
|
||
@test replace("ḟøøbarḟøø", r"(ḟø|ba)" => "xx", "" => "") == "xxøxxrxxø" | ||
@test replace("ḟøøbarḟøø", r"(ḟøø|ba)" => "bar", "" => "") == "barbarrbar" | ||
|
||
@test replace("fooƀäṙfoo", r"(fo|ƀä)" => "xx", "" => "") == "xxoxxṙxxo" | ||
@test replace("fooƀäṙfoo", r"(foo|ƀä)" => "ƀäṙ", "" => "") == "ƀäṙƀäṙṙƀäṙ" | ||
|
||
@test replace("ḟøøƀäṙḟøø", r"(ḟø|ƀä)" => "xx", "" => "") == "xxøxxṙxxø" | ||
@test replace("ḟøøƀäṙḟøø", r"(ḟøø|ƀä)" => "ƀäṙ", "" => "") == "ƀäṙƀäṙṙƀäṙ" | ||
|
||
@test replace("foo", "oo" => uppercase, "" => "") == "fOO" | ||
|
||
# Issue 13332 | ||
@test replace("abc", 'b' => 2.1, "" => "") == "a2.1c" | ||
|
||
# test replace with a count for String and GenericString | ||
# check that replace is a no-op if count==0 | ||
for s in ["aaa", Test.GenericString("aaa")] | ||
@test_throws DomainError replace(s, 'a' => "", count = -1, "" => "") | ||
@test replace(s, 'a' => 'z', count=0, "" => "")::String == s | ||
@test replace(s, 'a' => 'z', count=1, "" => "") == "zaa" | ||
@test replace(s, 'a' => 'z', count=2, "" => "") == "zza" | ||
@test replace(s, 'a' => 'z', count=3, "" => "") == "zzz" | ||
@test replace(s, 'a' => 'z', count=4, "" => "") == "zzz" | ||
@test replace(s, 'a' => 'z', count=typemax(Int), "" => "") == "zzz" | ||
@test replace(s, 'a' => 'z', "" => "") == "zzz" | ||
end | ||
|
||
let s = "abc" | ||
@test replace(s) === s | ||
@test replace(s, 'a' => 'z', "" => "") === "zbc" | ||
@test replace(s, 'a' => 'z', 'b' => 'y') == "zyc" | ||
@test replace(s, 'a' => 'z', 'c' => 'x', "b" => 'y') == "zyx" | ||
@test replace(s, '1' => 'z', "" => "") == s | ||
@test replace(s, 'b' => "BbB", "" => "", count=2) == "aBbBc" | ||
end | ||
|
||
let s = "quick quicker quickest" | ||
@test replace(s) === s | ||
@test replace(s, "quickest" => 'z', "quicker" => uppercase, "quick" => 'a') == "a QUICKER z" | ||
@test replace(s, "quick" => 'a', "quicker" => uppercase, "quickest" => 'z') == "a aer aest" | ||
@test replace(s, "quickest" => "lame", "quicker" => "is", "quick" => "Duck", count=2) == "Duck is quickest" | ||
@test "1q1u1i1c1k1 1q1u1i1c1k1e1r1 1q1u1i1c1k1e1s1t1" == | ||
replace(s, "" => '1', "" => "") == | ||
replace(s, "" => '1', "" => '2') | ||
@test replace(s, "qu" => "QU", "qu" => "never happens", "ick" => "") == "QU QUer QUest" | ||
@test replace(s, " " => '_', "r " => "r-") == "quick_quicker-quickest" | ||
@test replace(s, r"[aeiou]" => "ä", "ui" => "ki", "i" => "I") == "qääck qääckär qääckäst" | ||
@test replace(s, "i" => "I", "ui" => "ki", r"[aeiou]" => "ä") == "qkick qkickär qkickäst" | ||
@test replace(s, r"[^ ]+" => "word", "quicker " => "X", count=big"99") == "word word word" | ||
@test replace(s, "quicker " => "X", r"[^ ]+" => "word", count=big"99") == "word Xword" | ||
|
||
@test replace(s, r"(quick)(e)" => s"\2-\1", "x" => "X") == "quick e-quickr e-quickst" | ||
|
||
@test replace(s, 'q' => 'Q', 'u' => 'U') == "QUick QUicker QUickest" | ||
@test replace(s, 'q' => 'Q', r"u" => 'U') == "QUick QUicker QUickest" | ||
@test replace(s, 'q' => 'Q', ==('u') => uppercase) == "QUick QUicker QUickest" | ||
@test replace(s, 'q' => 'Q', islowercase => '-') == "Q---- Q------ Q-------" | ||
@test replace(s, ['q', 'u'] => 'K') == "KKick KKicker KKickest" | ||
@test replace(s, occursin("uq") => 'K') == "KKick KKicker KKickest" | ||
@test replace(s, ==('q') => "B") == "Buick Buicker Buickest" | ||
|
||
@test replace(s, "qui" => "A", 'r' => 'R') == "Ack AckeR Ackest" | ||
@test replace(s, 'r' => 'x', islowercase => uppercase) == "QUICK QUICKEx QUICKEST" | ||
@test replace(s, islowercase => uppercase, 'r' => 'x') == "QUICK QUICKER QUICKEST" | ||
@test replace(s, "q" => "z", islowercase => uppercase, 'r' => 'x') == "zUICK zUICKER zUICKEST" | ||
@test replace(s, "qui" => "A", 'r' => 'x', islowercase => uppercase) == "ACK ACKEx ACKEST" | ||
@test replace(s, "qui" => "A", 'r' => 'x', islowercase => uppercase) == "ACK ACKEx ACKEST" | ||
@test replace(s, r"q" => "z", islowercase => uppercase, 'r' => 'x') == "zUICK zUICKER zUICKEST" | ||
|
||
@test replace(s, "q" => s"a\0b") == "aqbuick aqbuicker aqbuickest" | ||
@test replace(s, "q" => s"a\0b\n\\\g<0>") == "aqb\n\\quick aqb\n\\quicker aqb\n\\quickest" | ||
@test_throws ErrorException("PCRE error: unknown substring") replace(s, r"q" => s"a\1b") | ||
@test_throws ErrorException("Bad replacement string: pattern is not a Regex") replace(s, "q" => s"a\1b") | ||
end | ||
end | ||
|
||
@testset "chomp/chop" begin | ||
@test chomp("foo\n") == "foo" | ||
@test chomp("fo∀\n") == "fo∀" | ||
|