Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

src: allow simdutf::convert_* functions to return zero #47471

Merged
merged 6 commits into from
Apr 9, 2023
Merged
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 28 additions & 6 deletions src/inspector/node_string.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,17 @@ void builderAppendQuotedString(StringBuilder& builder,
size_t expected_utf16_length =
simdutf::utf16_length_from_utf8(string.data(), string.length());
MaybeStackBuffer<char16_t> buffer(expected_utf16_length);
// simdutf::convert_utf8_to_utf16 returns zero in case of error.
size_t utf16_length = simdutf::convert_utf8_to_utf16(
string.data(), string.length(), buffer.out());
CHECK_EQ(expected_utf16_length, utf16_length);
escapeWideStringForJSON(reinterpret_cast<const uint16_t*>(buffer.out()),
utf16_length,
&builder);
// We have that utf16_length == expected_utf16_length if and only
// if the input was a valid UTF-8 string.
if (utf16_length != 0) {
CHECK_EQ(expected_utf16_length, utf16_length);
escapeWideStringForJSON(reinterpret_cast<const uint16_t*>(buffer.out()),
utf16_length,
&builder);
} // Otherwise, we had an invalid UTF-8 input.
}
builder.put('"');
}
Expand All @@ -35,8 +40,12 @@ std::unique_ptr<Value> parseJSON(const std::string_view string) {
size_t expected_utf16_length =
simdutf::utf16_length_from_utf8(string.data(), string.length());
MaybeStackBuffer<char16_t> buffer(expected_utf16_length);
// simdutf::convert_utf8_to_utf16 returns zero in case of error.
size_t utf16_length = simdutf::convert_utf8_to_utf16(
string.data(), string.length(), buffer.out());
// We have that utf16_length == expected_utf16_length if and only
// if the input was a valid UTF-8 string.
if (utf16_length == 0) return nullptr; // We had an invalid UTF-8 input.
CHECK_EQ(expected_utf16_length, utf16_length);
return parseJSONCharacters(reinterpret_cast<const uint16_t*>(buffer.out()),
utf16_length);
Expand All @@ -62,9 +71,14 @@ String StringViewToUtf8(v8_inspector::StringView view) {
size_t expected_utf8_length =
simdutf::utf8_length_from_utf16(source, view.length());
MaybeStackBuffer<char> buffer(expected_utf8_length);
// convert_utf16_to_utf8 returns zero in case of error.
size_t utf8_length =
simdutf::convert_utf16_to_utf8(source, view.length(), buffer.out());
CHECK_EQ(expected_utf8_length, utf8_length);
// We have that utf8_length == expected_utf8_length if and only
// if the input was a valid UTF-16 string. Otherwise, utf8_length
// must be zero.
assert(utf8_length == 0 || utf8_length == expected_utf8_length);
lemire marked this conversation as resolved.
Show resolved Hide resolved
// An invalid UTF-16 input will generate the empty string:
return String(buffer.out(), utf8_length);
}

Expand Down Expand Up @@ -112,9 +126,14 @@ String fromUTF16(const uint16_t* data, size_t length) {
size_t expected_utf8_length =
simdutf::utf8_length_from_utf16(casted_data, length);
MaybeStackBuffer<char> buffer(expected_utf8_length);
// simdutf::convert_utf16_to_utf8 returns zero in case of error.
size_t utf8_length =
simdutf::convert_utf16_to_utf8(casted_data, length, buffer.out());
CHECK_EQ(expected_utf8_length, utf8_length);
// We have that utf8_length == expected_utf8_length if and only
// if the input was a valid UTF-16 string. Otherwise, utf8_length
// must be zero.
assert(utf8_length == 0 || utf8_length == expected_utf8_length);
lemire marked this conversation as resolved.
Show resolved Hide resolved
// An invalid UTF-16 input will generate the empty string:
return String(buffer.out(), utf8_length);
}

Expand All @@ -123,6 +142,9 @@ const uint8_t* CharactersUTF8(const std::string_view s) {
}

size_t CharacterCount(const std::string_view s) {
// The utf32_length_from_utf8 function calls count_utf8.
// The count_utf8 function counts the number of code points
// (characters) in the string, assuming that the string is valid Unicode.
// TODO(@anonrig): Test to make sure CharacterCount returns correctly.
return simdutf::utf32_length_from_utf8(s.data(), s.length());
}
Expand Down