diff --git a/base/utf16.jl b/base/utf16.jl index 8caa3fd14f19d..e2b9a6aa228b6 100644 --- a/base/utf16.jl +++ b/base/utf16.jl @@ -143,3 +143,26 @@ function utf16(p::Union{Ptr{UInt16}, Ptr{Int16}}) while unsafe_load(p, len+1) != 0; len += 1; end utf16(p, len) end + +function map(fun, str::UTF16String) + buf = UInt16[] + sizehint!(buf, length(str.data)) + for ch in str + c2 = fun(ch) + !isa(c2, Char) && + throw(UnicodeError(UTF_ERR_MAP_CHAR, 0, 0)) + uc = reinterpret(UInt32, c2) + if uc < 0x10000 + utf16_is_surrogate(uc) && + throw(UnicodeError(UTF_ERR_INVALID_CHAR, 0, uc)) + push!(buf, UInt16(uc)) + elseif uc <= 0x10ffff + push!(buf, UInt16(0xd7c0 + (uc >> 10))) + push!(buf, UInt16(0xdc00 + (uc & 0x3ff))) + else + throw(UnicodeError(UTF_ERR_INVALID_CHAR, 0, uc)) + end + end + push!(buf, 0) + UTF16String(buf) +end diff --git a/test/strings.jl b/test/strings.jl index 3701567bae68e..c4b9241415a21 100644 --- a/test/strings.jl +++ b/test/strings.jl @@ -1897,3 +1897,21 @@ end @test [c for c in "ḟøøƀäṙ"] == ['ḟ', 'ø', 'ø', 'ƀ', 'ä', 'ṙ'] @test [i for i in eachindex("ḟøøƀäṙ")] == [1, 4, 6, 8, 10, 12] @test [x for x in enumerate("ḟøøƀäṙ")] == [(1, 'ḟ'), (2, 'ø'), (3, 'ø'), (4, 'ƀ'), (5, 'ä'), (6, 'ṙ')] + +# issue # 11464: uppercase/lowercase of UTF16String becomes a UTF8String +@test typeof(uppercase("abcdef")) == ASCIIString +@test typeof(uppercase(utf8("abcdef"))) == UTF8String +@test typeof(uppercase(utf16("abcdef"))) == UTF16String +@test typeof(uppercase(utf32("abcdef"))) == UTF32String +@test typeof(lowercase("ABCDEF")) == ASCIIString +@test typeof(lowercase(utf8("ABCDEF"))) == UTF8String +@test typeof(lowercase(utf16("ABCDEF"))) == UTF16String +@test typeof(lowercase(utf32("ABCDEF"))) == UTF32String + +foomap(ch) = (ch > 65) +foobar(ch) = Char(0xd800) +foobaz(ch) = Char(0x20000) +@test_throws UnicodeError map(foomap, utf16("abcdef")) +@test_throws UnicodeError map(foobar, utf16("abcdef")) +@test_throws UnicodeError map(foobaz, utf16("abcdef")) +