Skip to content

Commit

Permalink
Simplify + optimize lengthBytesUTF8. Don't fully decode surrogates (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
MaxGraey authored Jul 17, 2022
1 parent 14c106a commit 8c047b1
Showing 1 changed file with 10 additions and 6 deletions.
16 changes: 10 additions & 6 deletions src/runtime_strings.js
Original file line number Diff line number Diff line change
Expand Up @@ -191,12 +191,16 @@ function lengthBytesUTF8(str) {
for (var i = 0; i < str.length; ++i) {
// Gotcha: charCodeAt returns a 16-bit word that is a UTF-16 encoded code unit, not a Unicode code point of the character! So decode UTF16->UTF32->UTF8.
// See http://unicode.org/faq/utf_bom.html#utf16-3
var u = str.charCodeAt(i); // possibly a lead surrogate
if (u >= 0xD800 && u <= 0xDFFF) u = 0x10000 + ((u & 0x3FF) << 10) | (str.charCodeAt(++i) & 0x3FF);
if (u <= 0x7F) ++len;
else if (u <= 0x7FF) len += 2;
else if (u <= 0xFFFF) len += 3;
else len += 4;
var c = str.charCodeAt(i); // possibly a lead surrogate
if (c <= 0x7F) {
len++;
} else if (c <= 0x7FF) {
len += 2;
} else if (c >= 0xD800 && c <= 0xDFFF) {
len += 4; ++i;
} else {
len += 3;
}
}
return len;
}

0 comments on commit 8c047b1

Please sign in to comment.