From ad38866fc5aca144ce0b4fdb535be06ce7edf3c8 Mon Sep 17 00:00:00 2001 From: Robert Nagy Date: Thu, 5 Sep 2024 00:34:29 +0200 Subject: [PATCH] buffer: re-enable Fast API for Buffer.write Re-enables fast Fast API for Buffer.write after fixing UTF8 handling. Fixes: https://github.com/nodejs/node/issues/54521 PR-URL: https://github.com/nodejs/node/pull/54526 Reviewed-By: Daniel Lemire Reviewed-By: Benjamin Gruenbaum Reviewed-By: James M Snell Reviewed-By: Anna Henningsen Reviewed-By: Yagiz Nizipli Reviewed-By: Paolo Insogna --- src/node_buffer.cc | 134 +++++++++++++++++++++--- test/parallel/test-buffer-write-fast.js | 45 ++++++++ 2 files changed, 165 insertions(+), 14 deletions(-) create mode 100644 test/parallel/test-buffer-write-fast.js diff --git a/src/node_buffer.cc b/src/node_buffer.cc index 36917f755554223..42bd7b42d398ecd 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -22,6 +22,7 @@ #include "node_buffer.h" #include "node.h" #include "node_blob.h" +#include "node_debug.h" #include "node_errors.h" #include "node_external_reference.h" #include "node_i18n.h" @@ -1442,6 +1443,79 @@ void CopyArrayBuffer(const FunctionCallbackInfo& args) { memcpy(dest, src, bytes_to_copy); } +size_t convert_latin1_to_utf8_s(const char* src, + size_t src_len, + char* dst, + size_t dst_len) noexcept { + size_t src_pos = 0; + size_t dst_pos = 0; + + const auto safe_len = std::min(src_len, dst_len >> 1); + if (safe_len > 16) { + // convert_latin1_to_utf8 will never write more than input length * 2. + dst_pos += simdutf::convert_latin1_to_utf8(src, safe_len, dst); + src_pos += safe_len; + } + + // Based on: + // https://github.com/simdutf/simdutf/blob/master/src/scalar/latin1_to_utf8/latin1_to_utf8.h + // with an upper limit on the number of bytes to write. + + const auto src_ptr = reinterpret_cast(src); + const auto dst_ptr = reinterpret_cast(dst); + + size_t skip_pos = src_pos; + while (src_pos < src_len && dst_pos < dst_len) { + if (skip_pos <= src_pos && src_pos + 16 <= src_len && + dst_pos + 16 <= dst_len) { + uint64_t v1; + memcpy(&v1, src_ptr + src_pos + 0, 8); + uint64_t v2; + memcpy(&v2, src_ptr + src_pos + 8, 8); + if (((v1 | v2) & UINT64_C(0x8080808080808080)) == 0) { + memcpy(dst_ptr + dst_pos, src_ptr + src_pos, 16); + dst_pos += 16; + src_pos += 16; + } else { + skip_pos = src_pos + 16; + } + } else { + const auto byte = src_ptr[src_pos++]; + if ((byte & 0x80) == 0) { + dst_ptr[dst_pos++] = byte; + } else if (dst_pos + 2 <= dst_len) { + dst_ptr[dst_pos++] = (byte >> 6) | 0b11000000; + dst_ptr[dst_pos++] = (byte & 0b111111) | 0b10000000; + } else { + break; + } + } + } + + return dst_pos; +} + +template +uint32_t WriteOneByteString(const char* src, + uint32_t src_len, + char* dst, + uint32_t dst_len) { + if (dst_len == 0) { + return 0; + } + + if (encoding == UTF8) { + return convert_latin1_to_utf8_s(src, src_len, dst, dst_len); + } else if (encoding == LATIN1 || encoding == ASCII) { + const auto size = std::min(src_len, dst_len); + memcpy(dst, src, size); + return size; + } else { + // TODO(ronag): Add support for more encoding. + UNREACHABLE(); + } +} + template void SlowWriteString(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); @@ -1464,11 +1538,22 @@ void SlowWriteString(const FunctionCallbackInfo& args) { if (max_length == 0) return args.GetReturnValue().Set(0); - uint32_t written = StringBytes::Write( - env->isolate(), ts_obj_data + offset, max_length, str, encoding); + uint32_t written = 0; + + if ((encoding == UTF8 || encoding == LATIN1 || encoding == ASCII) && + str->IsExternalOneByte()) { + const auto src = str->GetExternalOneByteStringResource(); + written = WriteOneByteString( + src->data(), src->length(), ts_obj_data + offset, max_length); + } else { + written = StringBytes::Write( + env->isolate(), ts_obj_data + offset, max_length, str, encoding); + } + args.GetReturnValue().Set(written); } +template uint32_t FastWriteString(Local receiver, const v8::FastApiTypedArray& dst, const v8::FastOneByteString& src, @@ -1478,16 +1563,21 @@ uint32_t FastWriteString(Local receiver, CHECK(dst.getStorageIfAligned(&dst_data)); CHECK(offset <= dst.length()); CHECK(dst.length() - offset <= std::numeric_limits::max()); + TRACK_V8_FAST_API_CALL("buffer.writeString"); - const auto size = std::min( - {static_cast(dst.length() - offset), max_length, src.length}); - - memcpy(dst_data + offset, src.data, size); - - return size; + return WriteOneByteString( + src.data, + src.length, + reinterpret_cast(dst_data + offset), + std::min(dst.length() - offset, max_length)); } -static v8::CFunction fast_write_string(v8::CFunction::Make(FastWriteString)); +static v8::CFunction fast_write_string_ascii( + v8::CFunction::Make(FastWriteString)); +static v8::CFunction fast_write_string_latin1( + v8::CFunction::Make(FastWriteString)); +static v8::CFunction fast_write_string_utf8( + v8::CFunction::Make(FastWriteString)); void Initialize(Local target, Local unused, @@ -1554,9 +1644,21 @@ void Initialize(Local target, SetMethod(context, target, "hexWrite", StringWrite); SetMethod(context, target, "ucs2Write", StringWrite); - SetMethod(context, target, "asciiWriteStatic", SlowWriteString); - SetMethod(context, target, "latin1WriteStatic", SlowWriteString); - SetMethod(context, target, "utf8WriteStatic", SlowWriteString); + SetFastMethod(context, + target, + "asciiWriteStatic", + SlowWriteString, + &fast_write_string_ascii); + SetFastMethod(context, + target, + "latin1WriteStatic", + SlowWriteString, + &fast_write_string_latin1); + SetFastMethod(context, + target, + "utf8WriteStatic", + SlowWriteString, + &fast_write_string_utf8); SetMethod(context, target, "getZeroFillToggle", GetZeroFillToggle); } @@ -1601,8 +1703,12 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) { registry->Register(SlowWriteString); registry->Register(SlowWriteString); registry->Register(SlowWriteString); - registry->Register(fast_write_string.GetTypeInfo()); - registry->Register(FastWriteString); + registry->Register(FastWriteString); + registry->Register(fast_write_string_ascii.GetTypeInfo()); + registry->Register(FastWriteString); + registry->Register(fast_write_string_latin1.GetTypeInfo()); + registry->Register(FastWriteString); + registry->Register(fast_write_string_utf8.GetTypeInfo()); registry->Register(StringWrite); registry->Register(StringWrite); registry->Register(StringWrite); diff --git a/test/parallel/test-buffer-write-fast.js b/test/parallel/test-buffer-write-fast.js new file mode 100644 index 000000000000000..4594934f75838ca --- /dev/null +++ b/test/parallel/test-buffer-write-fast.js @@ -0,0 +1,45 @@ +// Flags: --expose-internals --no-warnings --allow-natives-syntax +'use strict'; + +const common = require('../common'); +const assert = require('assert'); + +const { internalBinding } = require('internal/test/binding'); + +function testFastUtf8Write() { + { + const buf = Buffer.from('\x80'); + + assert.strictEqual(buf[0], 194); + assert.strictEqual(buf[1], 128); + } + + { + const buf = Buffer.alloc(64); + const newBuf = buf.subarray(0, buf.write('éñüçßÆ')); + assert.deepStrictEqual(newBuf, Buffer.from([195, 169, 195, 177, 195, 188, 195, 167, 195, 159, 195, 134])); + } + + { + const buf = Buffer.alloc(64); + const newBuf = buf.subarray(0, buf.write('¿')); + assert.deepStrictEqual(newBuf, Buffer.from([194, 191])); + } + + { + const buf = Buffer.from(new ArrayBuffer(34), 0, 16); + const str = Buffer.from([50, 83, 127, 39, 104, 8, 74, 65, 108, 123, 5, 4, 82, 10, 7, 53]).toString(); + const newBuf = buf.subarray(0, buf.write(str)); + assert.deepStrictEqual(newBuf, Buffer.from([ 50, 83, 127, 39, 104, 8, 74, 65, 108, 123, 5, 4, 82, 10, 7, 53])); + } +} + +eval('%PrepareFunctionForOptimization(Buffer.prototype.utf8Write)'); +testFastUtf8Write(); +eval('%OptimizeFunctionOnNextCall(Buffer.prototype.utf8Write)'); +testFastUtf8Write(); + +if (common.isDebug) { + const { getV8FastApiCallCount } = internalBinding('debug'); + assert(getV8FastApiCallCount('buffer.writeString'), 4); +}