Skip to content

Commit

Permalink
Fix #11460, Fix #11464 UTF16String uppercase/lowercase/map
Browse files Browse the repository at this point in the history
`uppercase`/`lowercase`/`map` on a `UTF16String` now returns a `UTF16String`,
consistent with `ASCIIString`, `UTF8String`, and `UTF32String` returning
the same type string as input.
  • Loading branch information
ScottPJones committed Jun 22, 2015
1 parent 1e081b7 commit 7e84bcf
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 0 deletions.
23 changes: 23 additions & 0 deletions base/utf16.jl
Original file line number Diff line number Diff line change
Expand Up @@ -143,3 +143,26 @@ function utf16(p::Union{Ptr{UInt16}, Ptr{Int16}})
while unsafe_load(p, len+1) != 0; len += 1; end
utf16(p, len)
end

function map(fun, str::UTF16String)
buf = UInt16[]
sizehint!(buf, length(str.data))
for ch in str
c2 = fun(ch)
!isa(c2, Char) &&
throw(UnicodeError(UTF_ERR_MAP_CHAR, 0, 0))
uc = reinterpret(UInt32, c2)
if uc < 0x10000
utf16_is_surrogate(uc) &&
throw(UnicodeError(UTF_ERR_INVALID_CHAR, 0, uc))
push!(buf, UInt16(uc))
elseif uc <= 0x10ffff
push!(buf, UInt16(0xd7c0 + (uc >> 10)))
push!(buf, UInt16(0xdc00 + (uc & 0x3ff)))
else
throw(UnicodeError(UTF_ERR_INVALID_CHAR, 0, uc))
end
end
push!(buf, 0)
UTF16String(buf)
end
18 changes: 18 additions & 0 deletions test/strings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1897,3 +1897,21 @@ end
@test [c for c in "ḟøøƀäṙ"] == ['', 'ø', 'ø', 'ƀ', 'ä', '']
@test [i for i in eachindex("ḟøøƀäṙ")] == [1, 4, 6, 8, 10, 12]
@test [x for x in enumerate("ḟøøƀäṙ")] == [(1, ''), (2, 'ø'), (3, 'ø'), (4, 'ƀ'), (5, 'ä'), (6, '')]

# issue # 11464: uppercase/lowercase of UTF16String becomes a UTF8String
@test typeof(uppercase("abcdef")) == ASCIIString
@test typeof(uppercase(utf8("abcdef"))) == UTF8String
@test typeof(uppercase(utf16("abcdef"))) == UTF16String
@test typeof(uppercase(utf32("abcdef"))) == UTF32String
@test typeof(lowercase("ABCDEF")) == ASCIIString
@test typeof(lowercase(utf8("ABCDEF"))) == UTF8String
@test typeof(lowercase(utf16("ABCDEF"))) == UTF16String
@test typeof(lowercase(utf32("ABCDEF"))) == UTF32String

foomap(ch) = (ch > 65)
foobar(ch) = Char(0xd800)
foobaz(ch) = Char(0x20000)
@test_throws UnicodeError map(foomap, utf16("abcdef"))
@test_throws UnicodeError map(foobar, utf16("abcdef"))
@test_throws UnicodeError map(foobaz, utf16("abcdef"))

0 comments on commit 7e84bcf

Please sign in to comment.