From 7e18f2ec62682d847c5d40dd5eda16066e00d116 Mon Sep 17 00:00:00 2001 From: Bryon Leung Date: Wed, 7 Oct 2015 13:47:57 -0400 Subject: [PATCH] src: add BE support to StringBytes::Encode() Versions of Node.js after v0.12 have relocated byte-swapping away from the StringBytes::Encode function, thereby causing a nan test (which accesses this function directly) to fail on big-endian machines. This change re-introduces byte swapping in StringBytes::Encode, done via a call to a function in util-inl. Another change in NodeBuffer::StringSlice was necessary to avoid double byte swapping in big-endian function calls to StringSlice. PR-URL: https://github.com/nodejs/node/pull/3410 Reviewed-By: Ben Noordhuis Reviewed-By: Trevor Norris --- src/node_buffer.cc | 9 ++++++--- src/string_bytes.cc | 16 ++++++++++++---- src/util-inl.h | 14 ++++++++++++++ src/util.h | 2 ++ 4 files changed, 34 insertions(+), 7 deletions(-) diff --git a/src/node_buffer.cc b/src/node_buffer.cc index 8b850700792a5b..dca75a817b7414 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -477,10 +477,11 @@ void StringSlice(const FunctionCallbackInfo& args) { // need to reorder on BE platforms. See http://nodejs.org/api/buffer.html // regarding Node's "ucs2" encoding specification. const bool aligned = (reinterpret_cast(data) % sizeof(*buf) == 0); - if (IsLittleEndian() && aligned) { - buf = reinterpret_cast(data); - } else { + if (IsLittleEndian() && !aligned) { // Make a copy to avoid unaligned accesses in v8::String::NewFromTwoByte(). + // This applies ONLY to little endian platforms, as misalignment will be + // handled by a byte-swapping operation in StringBytes::Encode on + // big endian platforms. uint16_t* copy = new uint16_t[length]; for (size_t i = 0, k = 0; i < length; i += 1, k += 2) { // Assumes that the input is little endian. @@ -490,6 +491,8 @@ void StringSlice(const FunctionCallbackInfo& args) { } buf = copy; release = true; + } else { + buf = reinterpret_cast(data); } args.GetReturnValue().Set(StringBytes::Encode(env->isolate(), buf, length)); diff --git a/src/string_bytes.cc b/src/string_bytes.cc index 90fafd40cd3379..a916caf75e8960 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -6,6 +6,7 @@ #include #include // memcpy +#include // When creating strings >= this length v8's gc spins up and consumes // most of the execution time. For these cases it's more performant to @@ -406,9 +407,7 @@ size_t StringBytes::Write(Isolate* isolate, reinterpret_cast(buf) % sizeof(uint16_t); if (is_aligned) { uint16_t* const dst = reinterpret_cast(buf); - for (size_t i = 0; i < nchars; i++) - dst[i] = dst[i] << 8 | dst[i] >> 8; - break; + SwapBytes(dst, dst, nchars); } ASSERT_EQ(sizeof(uint16_t), 2); @@ -857,7 +856,16 @@ Local StringBytes::Encode(Isolate* isolate, const uint16_t* buf, size_t buflen) { Local val; - + std::vector dst; + if (IsBigEndian()) { + // Node's "ucs2" encoding expects LE character data inside a + // Buffer, so we need to reorder on BE platforms. See + // http://nodejs.org/api/buffer.html regarding Node's "ucs2" + // encoding specification + dst.resize(buflen); + SwapBytes(&dst[0], buf, buflen); + buf = &dst[0]; + } if (buflen < EXTERN_APEX) { val = String::NewFromTwoByte(isolate, buf, diff --git a/src/util-inl.h b/src/util-inl.h index 75bdb4784aaaf3..669b7e7535884b 100644 --- a/src/util-inl.h +++ b/src/util-inl.h @@ -198,6 +198,20 @@ TypeName* Unwrap(v8::Local object) { return static_cast(pointer); } +void SwapBytes(uint16_t* dst, const uint16_t* src, size_t buflen) { + for (size_t i = 0; i < buflen; i++) { + // __builtin_bswap16 generates more efficient code with + // g++ 4.8 on PowerPC and other big-endian archs +#ifdef __GNUC__ + dst[i] = __builtin_bswap16(src[i]); +#else + dst[i] = (src[i] << 8) | (src[i] >> 8); +#endif + } +} + + + } // namespace node #endif // SRC_UTIL_INL_H_ diff --git a/src/util.h b/src/util.h index 20cd54758cb102..7b2bc0f1a270ba 100644 --- a/src/util.h +++ b/src/util.h @@ -176,6 +176,8 @@ inline void ClearWrap(v8::Local object); template inline TypeName* Unwrap(v8::Local object); +inline void SwapBytes(uint16_t* dst, const uint16_t* src, size_t buflen); + class Utf8Value { public: explicit Utf8Value(v8::Isolate* isolate, v8::Local value);