-
Notifications
You must be signed in to change notification settings - Fork 30.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
src: replace custom ASCII validation with simdutf one #46271
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,7 @@ | |
#include "env-inl.h" | ||
#include "node_buffer.h" | ||
#include "node_errors.h" | ||
#include "simdutf.h" | ||
#include "util.h" | ||
|
||
#include <climits> | ||
|
@@ -467,60 +468,6 @@ Maybe<size_t> StringBytes::Size(Isolate* isolate, | |
UNREACHABLE(); | ||
} | ||
|
||
|
||
|
||
|
||
static bool contains_non_ascii_slow(const char* buf, size_t len) { | ||
for (size_t i = 0; i < len; ++i) { | ||
if (buf[i] & 0x80) | ||
return true; | ||
} | ||
return false; | ||
} | ||
|
||
|
||
static bool contains_non_ascii(const char* src, size_t len) { | ||
if (len < 16) { | ||
return contains_non_ascii_slow(src, len); | ||
} | ||
|
||
const unsigned bytes_per_word = sizeof(uintptr_t); | ||
const unsigned align_mask = bytes_per_word - 1; | ||
const unsigned unaligned = reinterpret_cast<uintptr_t>(src) & align_mask; | ||
|
||
if (unaligned > 0) { | ||
const unsigned n = bytes_per_word - unaligned; | ||
if (contains_non_ascii_slow(src, n)) | ||
return true; | ||
src += n; | ||
len -= n; | ||
} | ||
|
||
|
||
#if defined(_WIN64) || defined(_LP64) | ||
const uintptr_t mask = 0x8080808080808080ll; | ||
#else | ||
const uintptr_t mask = 0x80808080l; | ||
#endif | ||
|
||
const uintptr_t* srcw = reinterpret_cast<const uintptr_t*>(src); | ||
|
||
for (size_t i = 0, n = len / bytes_per_word; i < n; ++i) { | ||
if (srcw[i] & mask) | ||
return true; | ||
} | ||
|
||
const unsigned remainder = len & align_mask; | ||
if (remainder > 0) { | ||
const size_t offset = len - remainder; | ||
if (contains_non_ascii_slow(src + offset, remainder)) | ||
return true; | ||
} | ||
|
||
return false; | ||
} | ||
|
||
|
||
static void force_ascii_slow(const char* src, char* dst, size_t len) { | ||
for (size_t i = 0; i < len; ++i) { | ||
dst[i] = src[i] & 0x7f; | ||
|
@@ -634,7 +581,8 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate, | |
} | ||
|
||
case ASCII: | ||
if (contains_non_ascii(buf, buflen)) { | ||
if (simdutf::validate_ascii_with_errors(buf, buflen).error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any reason for using the error version? Is the common case to have invalid ASCII? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The The common case is to not use this branch (ASCII) at all :) |
||
// The input contains non-ASCII bytes. | ||
char* out = node::UncheckedMalloc(buflen); | ||
if (out == nullptr) { | ||
*error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Complete aside: I have a distinct memory of writing this code, it looks like how I would write such code, yet git blame attributes it to Isaac S... huh. The human mind is a fickle thing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@bnoordhuis I double-checked out of interest (and also because it looks like code from you), and … you did! e325ace is all yours 🙂
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I feel vindicated now! Thanks for digging that up, Anna. :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Written almost ten years ago, and you still remember your code. I've nothing but respect for all of you.