From 58c7eb853c2a8f1bfbb997f64e25639db51960a4 Mon Sep 17 00:00:00 2001 From: Rob Hogan Date: Mon, 15 Jul 2024 01:40:18 -0700 Subject: [PATCH] jsinspector: Support UTF-8 responses to CDP's IO.read (#45426) Summary: Pull Request resolved: https://github.com/facebook/react-native/pull/45426 The initial implementation of `Network.loadNetworkResource` and the accompanying `IO.read` (D54202854) base64-encodes all data as if it is binary. This is the more general case, and we'll continue to base64-encode non-text resources. In the common case of text resources (particularly JS and JSON), it'd be preferable to do as Chrome does and send UTF-8 over the wire directly. This has a few performance benefits: - Less CPU and RAM footprint on device (UTF-8 truncation is constant-time, fast, and in-place), similarly less decoding for the frontend. - 25% less data per chunk (base64 encodes 3 bytes as 4 characters), implies up to 25% fewer network round trips for large resources. It also has the benefit of being human-readable in the CDP protocol inspector. ## Determining whether data is text We use exactly Chromium's heuristic for this (code pointers in comments), which is based only on the `Content-Type` header, and assuming any text mime type is UTF-8. ## UTF-8 truncation The slight implementation complexity here is that `IO.read` requests may specify a maximum number of bytes, and so we must slice a raw buffer up into valid UTF-8 sequences. This turns out to be fairly simple and cheap: 1. Naively truncate the buffer, inspect the last byte 2. If the last byte has topmost bit =0, it's ASCII (single byte) and we're done. 3. Otherwise, look back at most 3 bytes to find the first byte of the code point (topmost bits 11), counting the number of "continuationBytes" at the end of our buffer. If we don't find one within 3 bytes then the string isn't UTF-8 - throw. 4. Read the code point length, which is encoded into the first byte. 5. Resize to remove the last code point fragment, unless it terminates correctly exactly at the end of our buffer. ## Edge cases + divergence from Chrome Chrome's behaviour here in at least one case is questionable and we intentionally differ: - If a response has header "content-type: text/plain" but content eg`0x80` (not valid UTF-8), Chrome will respond to an `IO.read` with `{ "data": "", "base64Encoded": false, "eof": false }`, ie an empty string, but will move its internal pointer such that the next or some subsequent `IO.read` will have `"eof": true`. To the client, this is indistinguishable from a successfully received resource, when in fact it is effectively corrupted. - Instead, we respond with a CDP error to the `IO.read`. We do not immediately cancel the request or discard data, since not all `IO.read` errors are necessarily fatal. I've verified that CDT sends `IO.close` after an error, so we'll clean up that way (this isn't strictly guaranteed by any spec, but nor is `IO.close` after a resource is successfully consumed). Changelog: [General] Debugger: Support text responses to CDP `IO.read` requests Differential Revision: D58323790 --- .../jsinspector-modern/NetworkIOAgent.cpp | 66 ++++- .../ReactCommon/jsinspector-modern/Utf8.h | 56 +++++ .../tests/HostTargetTest.cpp | 228 +++++++++++++++++- .../jsinspector-modern/tests/Utf8.cpp | 58 +++++ 4 files changed, 393 insertions(+), 15 deletions(-) create mode 100644 packages/react-native/ReactCommon/jsinspector-modern/Utf8.h create mode 100644 packages/react-native/ReactCommon/jsinspector-modern/tests/Utf8.cpp diff --git a/packages/react-native/ReactCommon/jsinspector-modern/NetworkIOAgent.cpp b/packages/react-native/ReactCommon/jsinspector-modern/NetworkIOAgent.cpp index eb6a45c31703e7..1b46a9bfce01de 100644 --- a/packages/react-native/ReactCommon/jsinspector-modern/NetworkIOAgent.cpp +++ b/packages/react-native/ReactCommon/jsinspector-modern/NetworkIOAgent.cpp @@ -7,12 +7,23 @@ #include "NetworkIOAgent.h" #include +#include +#include "Utf8.h" namespace facebook::react::jsinspector_modern { static constexpr long DEFAULT_BYTES_PER_READ = 1048576; // 1MB (Chrome v112 default) +// https://github.com/chromium/chromium/blob/128.0.6593.1/content/browser/devtools/devtools_io_context.cc#L71-L73 +static constexpr std::array kTextMIMETypePrefixes{ + "text/", + "application/x-javascript", + "application/json", + "application/xml", + "application/javascript" // Not in Chromium but emitted by Metro +}; + namespace { struct InitStreamResult { @@ -103,6 +114,17 @@ class Stream : public NetworkRequestListener, } void onHeaders(int httpStatusCode, const Headers& headers) override { + // Find content-type through case-insensitive search of headers. + for (const auto& [name, value] : headers) { + std::string lowerName = name; + std::transform( + lowerName.begin(), lowerName.end(), lowerName.begin(), ::tolower); + if (lowerName == "content-type") { + isText_ = isTextMimeType(value); + break; + }; + } + // If we've already seen an error, the initial callback as already been // called with it. if (initCb_) { @@ -181,16 +203,43 @@ class Stream : public NetworkRequestListener, std::vector buffer(maxBytesToRead); data_.read(buffer.data(), maxBytesToRead); auto bytesRead = data_.gcount(); + std::string output; + buffer.resize(bytesRead); + if (isText_) { + auto originalSize = buffer.size(); + // Maybe resize to drop the last 1-3 bytes so that buffer is valid. + truncateToValidUTF8(buffer); + if (buffer.size() < originalSize) { + // Rewind the stream so that the next read starts from the start of + // the code point we're removing from this chunk. + data_.seekg(buffer.size() - originalSize, std::ios_base::cur); + } + output = std::string(buffer.begin(), buffer.begin() + buffer.size()); + } else { + // Encode the slice as a base64 string. + output = + folly::base64Encode(std::string_view(buffer.data(), buffer.size())); + } + return IOReadResult{ - .data = - folly::base64Encode(std::string_view(buffer.data(), buffer.size())), - .eof = bytesRead == 0 && completed_, - // TODO: Support UTF-8 string responses - .base64Encoded = true}; + .data = output, + .eof = output.length() == 0 && completed_, + .base64Encoded = !isText_}; + } + + // https://github.com/chromium/chromium/blob/128.0.6593.1/content/browser/devtools/devtools_io_context.cc#L70-L80 + static bool isTextMimeType(const std::string& mimeType) { + for (auto& kTextMIMETypePrefix : kTextMIMETypePrefixes) { + if (mimeType.starts_with(kTextMIMETypePrefix)) { + return true; + } + } + return false; } bool completed_{false}; + bool isText_{false}; std::optional error_; std::stringstream data_; long bytesReceived_{0}; @@ -338,8 +387,13 @@ void NetworkIOAgent::handleIoRead(const cdp::PreparsedRequest& req) { } else { it->second->read( size ? *size : DEFAULT_BYTES_PER_READ, - [requestId, frontendChannel = frontendChannel_](auto resultOrError) { + [requestId, + frontendChannel = frontendChannel_, + streamId, + streamsWeak = std::weak_ptr(streams_)](auto resultOrError) { if (auto* error = std::get_if(&resultOrError)) { + // NB: Chrome DevTools calls IO.close after a read error, so any + // continuing download or retained data is cleaned up at that point. frontendChannel(cdp::jsonError( requestId, cdp::ErrorCode::InternalError, *error)); } else if (auto* result = std::get_if(&resultOrError)) { diff --git a/packages/react-native/ReactCommon/jsinspector-modern/Utf8.h b/packages/react-native/ReactCommon/jsinspector-modern/Utf8.h new file mode 100644 index 00000000000000..8017abd32e6bc8 --- /dev/null +++ b/packages/react-native/ReactCommon/jsinspector-modern/Utf8.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +namespace facebook::react::jsinspector_modern { + +/** + * Takes a vector of bytes representing a fragment of a UTF-8 string, and + * removes the minimum number (0-3) of trailing bytes so that the remainder is + * valid UTF-8. Useful for slicing binary data into UTF-8 strings. + * + * \param buffer Buffer to operate on - will be resized if necessary. + */ +inline void truncateToValidUTF8(std::vector& buffer) { + const auto length = buffer.size(); + // Ensure we don't cut a UTF-8 code point in the middle by removing any + // trailing bytes representing an incomplete UTF-8 code point. + + // If the last byte is a UTF-8 first byte or continuation byte (topmost bit + // is 1) (otherwise the last char is ASCII and we don't need to do + // anything). + if (length > 0 && (buffer[length - 1] & 0b10000000) == 0b10000000) { + int continuationBytes = 0; + // Find the first byte of the UTF-8 code point (topmost bits 11) and count + // the number of continuation bytes following it. + while ((buffer[length - continuationBytes - 1] & 0b11000000) != + 0b11000000) { + continuationBytes++; + if (continuationBytes > 3 || continuationBytes >= length - 1) { + throw std::runtime_error("Invalid UTF-8 sequence"); + } + } + char firstByteOfSequence = buffer[length - continuationBytes - 1]; + // Check for the special case that our original cut point was at the end + // of a UTF-8 code-point, and therefore already valid. This will be the + // case if the first byte indicates continuationBytes continuation bytes + // should follow, i.e. its top bits are (1+continuationBytes) 1's followed + // by a 0. + char mask = static_cast(0b11111000 << (3 - continuationBytes)); + char expectedBitsAfterMask = static_cast(mask << 1); + if (continuationBytes == 0 || + (firstByteOfSequence & mask) != expectedBitsAfterMask) { + // Remove the trailing continuation bytes, if any, and the first byte. + buffer.resize(length - (continuationBytes + 1)); + } + } +} + +} // namespace facebook::react::jsinspector_modern diff --git a/packages/react-native/ReactCommon/jsinspector-modern/tests/HostTargetTest.cpp b/packages/react-native/ReactCommon/jsinspector-modern/tests/HostTargetTest.cpp index b99aca83bf095d..f52f1c812b1d9a 100644 --- a/packages/react-native/ReactCommon/jsinspector-modern/tests/HostTargetTest.cpp +++ b/packages/react-native/ReactCommon/jsinspector-modern/tests/HostTargetTest.cpp @@ -759,9 +759,9 @@ TEST_F(HostTargetTest, NetworkLoadNetworkResourceSuccess) { EXPECT_CALL(fromPage(), onMessage(JsonEq(R"({ "id": 2, "result": { - "data": "SGVsbG8sIFc=", + "data": "Hello, W", "eof": false, - "base64Encoded": true + "base64Encoded": false } })"))); @@ -773,9 +773,9 @@ TEST_F(HostTargetTest, NetworkLoadNetworkResourceSuccess) { EXPECT_CALL(fromPage(), onMessage(JsonEq(R"({ "id": 3, "result": { - "data": "b3JsZCE=", + "data": "orld!", "eof": false, - "base64Encoded": true + "base64Encoded": false } })"))); toPage_->sendMessage(R"({ @@ -787,6 +787,139 @@ TEST_F(HostTargetTest, NetworkLoadNetworkResourceSuccess) { } })"); + // No more data - expect empty payload with eof: true. + EXPECT_CALL(fromPage(), onMessage(JsonEq(R"({ + "id": 4, + "result": { + "data": "", + "eof": true, + "base64Encoded": false + } + })"))); + toPage_->sendMessage(R"({ + "id": 4, + "method": "IO.read", + "params": { + "handle": "0", + "size": 8 + } + })"); + + executor([](NetworkRequestListener& listener) { listener.onCompletion(); }); + + // Close the stream. + EXPECT_CALL(fromPage(), onMessage(JsonEq(R"({ + "id": 5, + "result": {} + })"))); + toPage_->sendMessage(R"({ + "id": 5, + "method": "IO.close", + "params": { + "handle": "0" + } + })"); +} + +TEST_F(HostTargetTest, NetworkLoadNetworkResourceBinaryData) { + connect(); + + InSequence s; + + ScopedExecutor executor; + EXPECT_CALL( + hostTargetDelegate_, + loadNetworkResource( + Field(&LoadNetworkResourceRequest::url, "http://example.com"), _)) + .Times(1) + .WillOnce([&executor]( + const LoadNetworkResourceRequest& /*params*/, + ScopedExecutor executorArg) { + // Capture the ScopedExecutor to use later. + executor = std::move(executorArg); + }) + .RetiresOnSaturation(); + + // Load the resource, expect a CDP response as soon as headers are received. + toPage_->sendMessage(R"({ + "id": 1, + "method": "Network.loadNetworkResource", + "params": { + "url": "http://example.com" + } + })"); + + EXPECT_CALL(fromPage(), onMessage(JsonEq(R"({ + "id": 1, + "result": { + "resource": { + "success": true, + "stream": "0", + "httpStatusCode": 200, + "headers": { + "Content-Type": "application/octet-stream" + } + } + } + })"))); + + executor([](NetworkRequestListener& listener) { + // Arbitrary binary data. + listener.onHeaders( + 200, Headers{{"Content-Type", "application/octet-stream"}}); + }); + + // Retrieve the first chunk of data. + toPage_->sendMessage(R"({ + "id": 2, + "method": "IO.read", + "params": { + "handle": "0", + "size": 4 + } + })"); + + EXPECT_CALL(fromPage(), onMessage(JsonEq(R"({ + "id": 2, + "result": { + "data": "3q2+7w==", + "eof": false, + "base64Encoded": true + } + })"))); + + executor([](NetworkRequestListener& listener) { + std::array binaryData = { + '\xDE', + '\xAD', + '\xBE', + '\xEF', + '\x00', + '\x11', + '\x22', + '\x33', + }; + listener.onData(std::string_view(binaryData.data(), binaryData.size())); + }); + + // Retrieve the remaining data. + EXPECT_CALL(fromPage(), onMessage(JsonEq(R"({ + "id": 3, + "result": { + "data": "ABEiMw==", + "eof": false, + "base64Encoded": true + } + })"))); + toPage_->sendMessage(R"({ + "id": 3, + "method": "IO.read", + "params": { + "handle": "0", + "size": 4 + } + })"); + // No more data - expect empty payload with eof: true. EXPECT_CALL(fromPage(), onMessage(JsonEq(R"({ "id": 4, @@ -821,6 +954,83 @@ TEST_F(HostTargetTest, NetworkLoadNetworkResourceSuccess) { })"); } +TEST_F(HostTargetTest, NetworkLoadNetworkResourceMimeIsTextContentIsNot) { + connect(); + + InSequence s; + + ScopedExecutor executor; + EXPECT_CALL( + hostTargetDelegate_, + loadNetworkResource( + Field(&LoadNetworkResourceRequest::url, "http://example.com"), _)) + .Times(1) + .WillOnce([&executor]( + const LoadNetworkResourceRequest& /*params*/, + ScopedExecutor executorArg) { + // Capture the ScopedExecutor to use later. + executor = std::move(executorArg); + }) + .RetiresOnSaturation(); + + // Load the resource, expect a CDP response as soon as headers are received. + toPage_->sendMessage(R"({ + "id": 1, + "method": "Network.loadNetworkResource", + "params": { + "url": "http://example.com" + } + })"); + + EXPECT_CALL(fromPage(), onMessage(JsonEq(R"({ + "id": 1, + "result": { + "resource": { + "success": true, + "stream": "0", + "httpStatusCode": 200, + "headers": { + "Content-Type": "text/plain" + } + } + } + })"))); + + executor([](NetworkRequestListener& listener) { + // Claim text/plain... + listener.onHeaders(200, Headers{{"Content-Type", "text/plain"}}); + }); + + // Retrieve the first chunk of data. + toPage_->sendMessage(R"({ + "id": 2, + "method": "IO.read", + "params": { + "handle": "0", + "size": 4 + } + })"); + + EXPECT_CALL(fromPage(), onMessage(JsonEq(R"({ + "id": 2, + "error": { + "message": "Invalid UTF-8 sequence", + "code": -32603 + } + })"))); + + executor([](NetworkRequestListener& listener) { + std::array binaryData = { + '\x80', + '\x80', + '\x80', + '\x80', + }; + // Actually emit binary that cannot be represented as UTF-8. + listener.onData(std::string_view(binaryData.data(), binaryData.size())); + }); +} + TEST_F(HostTargetTest, NetworkLoadNetworkResourceStreamInterrupted) { connect(); @@ -1071,7 +1281,7 @@ TEST_F(HostTargetTest, NetworkLoadNetworkResourceStreamClosed) { "stream": "0", "httpStatusCode": 200, "headers": { - "x-test": "foo" + "content-type": "text/plain" } } } @@ -1082,7 +1292,7 @@ TEST_F(HostTargetTest, NetworkLoadNetworkResourceStreamClosed) { listener.setCancelFunction( [&cancelFunctionCalled]() { cancelFunctionCalled = true; }); - listener.onHeaders(200, Headers{{"x-test", "foo"}}); + listener.onHeaders(200, Headers{{"content-type", "text/plain"}}); }); // Retrieve the first chunk of data. @@ -1091,16 +1301,16 @@ TEST_F(HostTargetTest, NetworkLoadNetworkResourceStreamClosed) { "method": "IO.read", "params": { "handle": "0", - "size": 20 + "size": 22 } })"); EXPECT_CALL(fromPage(), onMessage(JsonEq(R"({ "id": 2, "result": { - "data": "VGhlIG1lYW5pbmcgb2YgbGlmZSA=", + "data": "The meaning of life is", "eof": false, - "base64Encoded": true + "base64Encoded": false } })"))); executor([](NetworkRequestListener& listener) { diff --git a/packages/react-native/ReactCommon/jsinspector-modern/tests/Utf8.cpp b/packages/react-native/ReactCommon/jsinspector-modern/tests/Utf8.cpp new file mode 100644 index 00000000000000..3b8c20a1871063 --- /dev/null +++ b/packages/react-native/ReactCommon/jsinspector-modern/tests/Utf8.cpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "../Utf8.h" + +#include +#include + +using namespace ::testing; + +namespace facebook::react::jsinspector_modern { + +TEST(Utf8Test, TruncateToValidUtf8) { + auto buffer = std::vector(); + std::vector> expectedStringsUpToSizes; + // Construct a buffer with a concatenation of all code points, and a vector + // or "expectedStringsUpToSizes", pairs of valid UTF8 prefix strings and sizes + // "n" such that the string would be the expected truncation of the first "n" + // bytes of the buffer. + for (const std::string& codePoint : { + "a", // 1 byte + "é", // 2 bytes + "✓", // 3 bytes + "😀" // 4 bytes + }) { + auto partial = std::string(buffer.data(), buffer.size()); + buffer.insert(buffer.end(), codePoint.begin(), codePoint.end()); + expectedStringsUpToSizes.push_back(std::pair(partial, buffer.size())); + } + // The constructed buffer is 10 bytes long, comprised of 4 code points of + // varied size. Range over naive slices of length 0-9 ensuring that the + // truncated result matches the valid UTF8 substring of length <= n. + size_t n = 0; + for (const auto& expectedStringUpToSize : expectedStringsUpToSizes) { + auto nextSize = expectedStringUpToSize.second; + auto expectedString = expectedStringUpToSize.first; + for (; n < nextSize; ++n) { + // Take the first n bytes of the whole buffer, which may be slicing + // through the middle of a code point. + std::vector slice(buffer.begin(), buffer.begin() + n); + truncateToValidUTF8(slice); + // Expect the final code point fragment has been discarded and that the + // contents are equal to expectedString, which is valid UTF8. + EXPECT_EQ(std::string(slice.begin(), slice.end()), expectedString); + } + } + // Finally verify that truncating the whole buffer, which is already valid + // UTF8, is a no-op. + auto wholeString = std::string(buffer.begin(), buffer.end()); + truncateToValidUTF8(buffer); + EXPECT_EQ(std::string(buffer.begin(), buffer.end()), wholeString); +} + +} // namespace facebook::react::jsinspector_modern