Skip to content

Commit

Permalink
src: allow simdutf::convert_* functions to return zero
Browse files Browse the repository at this point in the history
PR-URL: #47471
Reviewed-By: Rich Trott <[email protected]>
Reviewed-By: Yagiz Nizipli <[email protected]>
Reviewed-By: Luigi Pinca <[email protected]>
  • Loading branch information
lemire authored and RafaelGSS committed Apr 13, 2023
1 parent c7ad5bb commit 78c7475
Showing 1 changed file with 28 additions and 6 deletions.
34 changes: 28 additions & 6 deletions src/inspector/node_string.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,17 @@ void builderAppendQuotedString(StringBuilder& builder,
size_t expected_utf16_length =
simdutf::utf16_length_from_utf8(string.data(), string.length());
MaybeStackBuffer<char16_t> buffer(expected_utf16_length);
// simdutf::convert_utf8_to_utf16 returns zero in case of error.
size_t utf16_length = simdutf::convert_utf8_to_utf16(
string.data(), string.length(), buffer.out());
CHECK_EQ(expected_utf16_length, utf16_length);
escapeWideStringForJSON(reinterpret_cast<const uint16_t*>(buffer.out()),
utf16_length,
&builder);
// We have that utf16_length == expected_utf16_length if and only
// if the input was a valid UTF-8 string.
if (utf16_length != 0) {
CHECK_EQ(expected_utf16_length, utf16_length);
escapeWideStringForJSON(reinterpret_cast<const uint16_t*>(buffer.out()),
utf16_length,
&builder);
} // Otherwise, we had an invalid UTF-8 input.
}
builder.put('"');
}
Expand All @@ -35,8 +40,12 @@ std::unique_ptr<Value> parseJSON(const std::string_view string) {
size_t expected_utf16_length =
simdutf::utf16_length_from_utf8(string.data(), string.length());
MaybeStackBuffer<char16_t> buffer(expected_utf16_length);
// simdutf::convert_utf8_to_utf16 returns zero in case of error.
size_t utf16_length = simdutf::convert_utf8_to_utf16(
string.data(), string.length(), buffer.out());
// We have that utf16_length == expected_utf16_length if and only
// if the input was a valid UTF-8 string.
if (utf16_length == 0) return nullptr; // We had an invalid UTF-8 input.
CHECK_EQ(expected_utf16_length, utf16_length);
return parseJSONCharacters(reinterpret_cast<const uint16_t*>(buffer.out()),
utf16_length);
Expand All @@ -62,9 +71,14 @@ String StringViewToUtf8(v8_inspector::StringView view) {
size_t expected_utf8_length =
simdutf::utf8_length_from_utf16(source, view.length());
MaybeStackBuffer<char> buffer(expected_utf8_length);
// convert_utf16_to_utf8 returns zero in case of error.
size_t utf8_length =
simdutf::convert_utf16_to_utf8(source, view.length(), buffer.out());
CHECK_EQ(expected_utf8_length, utf8_length);
// We have that utf8_length == expected_utf8_length if and only
// if the input was a valid UTF-16 string. Otherwise, utf8_length
// must be zero.
CHECK(utf8_length == 0 || utf8_length == expected_utf8_length);
// An invalid UTF-16 input will generate the empty string:
return String(buffer.out(), utf8_length);
}

Expand Down Expand Up @@ -112,9 +126,14 @@ String fromUTF16(const uint16_t* data, size_t length) {
size_t expected_utf8_length =
simdutf::utf8_length_from_utf16(casted_data, length);
MaybeStackBuffer<char> buffer(expected_utf8_length);
// simdutf::convert_utf16_to_utf8 returns zero in case of error.
size_t utf8_length =
simdutf::convert_utf16_to_utf8(casted_data, length, buffer.out());
CHECK_EQ(expected_utf8_length, utf8_length);
// We have that utf8_length == expected_utf8_length if and only
// if the input was a valid UTF-16 string. Otherwise, utf8_length
// must be zero.
CHECK(utf8_length == 0 || utf8_length == expected_utf8_length);
// An invalid UTF-16 input will generate the empty string:
return String(buffer.out(), utf8_length);
}

Expand All @@ -123,6 +142,9 @@ const uint8_t* CharactersUTF8(const std::string_view s) {
}

size_t CharacterCount(const std::string_view s) {
// The utf32_length_from_utf8 function calls count_utf8.
// The count_utf8 function counts the number of code points
// (characters) in the string, assuming that the string is valid Unicode.
// TODO(@anonrig): Test to make sure CharacterCount returns correctly.
return simdutf::utf32_length_from_utf8(s.data(), s.length());
}
Expand Down

0 comments on commit 78c7475

Please sign in to comment.