From 341770fedf77ff5b8e0c646070029152b58fc746 Mon Sep 17 00:00:00 2001 From: Ruben Bridgewater Date: Wed, 14 Feb 2018 23:48:35 +0100 Subject: [PATCH] lib: improve normalize encoding performance This focuses on the common case by making sure they are prioritized. It also changes some typeof checks to test for undefined since that is faster and it adds a benchmark. PR-URL: https://github.com/nodejs/node/pull/18790 Reviewed-By: James M Snell Reviewed-By: Benjamin Gruenbaum Reviewed-By: Matteo Collina --- .../buffers/buffer-normalize-encoding.js | 43 +++++++++++ lib/buffer.js | 4 +- lib/internal/util.js | 77 ++++++++++++------- lib/string_decoder.js | 10 ++- 4 files changed, 101 insertions(+), 33 deletions(-) create mode 100644 benchmark/buffers/buffer-normalize-encoding.js diff --git a/benchmark/buffers/buffer-normalize-encoding.js b/benchmark/buffers/buffer-normalize-encoding.js new file mode 100644 index 00000000000000..7a820465bd5d6b --- /dev/null +++ b/benchmark/buffers/buffer-normalize-encoding.js @@ -0,0 +1,43 @@ +'use strict'; + +const common = require('../common.js'); + +const bench = common.createBenchmark(main, { + encoding: [ + 'ascii', + 'ASCII', + 'base64', + 'BASE64', + 'binary', + 'BINARY', + 'hex', + 'HEX', + 'latin1', + 'LATIN1', + 'ucs-2', + 'UCS-2', + 'ucs2', + 'UCS2', + 'utf-16le', + 'UTF-16LE', + 'utf-8', + 'UTF-8', + 'utf16le', + 'UTF16LE', + 'utf8', + 'UTF8' + ], + n: [1e6] +}, { + flags: ['--expose-internals'] +}); + +function main({ encoding, n }) { + const { normalizeEncoding } = require('internal/util'); + + bench.start(); + for (var i = 0; i < n; i++) { + normalizeEncoding(encoding); + } + bench.end(n); +} diff --git a/lib/buffer.js b/lib/buffer.js index 07bd63c0ae5b97..68cebedcc97ef4 100644 --- a/lib/buffer.js +++ b/lib/buffer.js @@ -242,7 +242,7 @@ function assertSize(size) { err = new errors.RangeError('ERR_INVALID_OPT_VALUE', 'size', size); } - if (err) { + if (err !== null) { Error.captureStackTrace(err, assertSize); throw err; } @@ -428,7 +428,7 @@ Buffer.compare = function compare(a, b) { Buffer.isEncoding = function isEncoding(encoding) { return typeof encoding === 'string' && - typeof normalizeEncoding(encoding) === 'string'; + normalizeEncoding(encoding) !== undefined; }; Buffer[kIsEncodingSymbol] = Buffer.isEncoding; diff --git a/lib/internal/util.js b/lib/internal/util.js index 2516b84f342cea..b144063ee50100 100644 --- a/lib/internal/util.js +++ b/lib/internal/util.js @@ -96,36 +96,59 @@ function assertCrypto() { throw new errors.Error('ERR_NO_CRYPTO'); } -// The loop should only run at most twice, retrying with lowercased enc -// if there is no match in the first pass. -// We use a loop instead of branching to retry with a helper -// function in order to avoid the performance hit. // Return undefined if there is no match. +// Move the "slow cases" to a separate function to make sure this function gets +// inlined properly. That prioritizes the common case. function normalizeEncoding(enc) { - if (enc == null || enc === '') return 'utf8'; - let retried; - while (true) { - switch (enc) { - case 'utf8': - case 'utf-8': - return 'utf8'; - case 'ucs2': - case 'ucs-2': - case 'utf16le': - case 'utf-16le': + if (enc == null || enc === 'utf8' || enc === 'utf-8') return 'utf8'; + return slowCases(enc); +} + +function slowCases(enc) { + switch (enc.length) { + case 4: + if (enc === 'UTF8') return 'utf8'; + if (enc === 'ucs2' || enc === 'UCS2') return 'utf16le'; + enc = `${enc}`.toLowerCase(); + if (enc === 'utf8') return 'utf8'; + if (enc === 'ucs2' || enc === 'UCS2') return 'utf16le'; + break; + case 3: + if (enc === 'hex' || enc === 'HEX' || `${enc}`.toLowerCase() === 'hex') + return 'hex'; + break; + case 5: + if (enc === 'ascii') return 'ascii'; + if (enc === 'ucs-2') return 'utf16le'; + if (enc === 'UTF-8') return 'utf8'; + if (enc === 'ASCII') return 'ascii'; + if (enc === 'UCS-2') return 'utf16le'; + enc = `${enc}`.toLowerCase(); + if (enc === 'utf-8') return 'utf8'; + if (enc === 'ascii') return 'ascii'; + if (enc === 'usc-2') return 'utf16le'; + break; + case 6: + if (enc === 'base64') return 'base64'; + if (enc === 'latin1' || enc === 'binary') return 'latin1'; + if (enc === 'BASE64') return 'base64'; + if (enc === 'LATIN1' || enc === 'BINARY') return 'latin1'; + enc = `${enc}`.toLowerCase(); + if (enc === 'base64') return 'base64'; + if (enc === 'latin1' || enc === 'binary') return 'latin1'; + break; + case 7: + if (enc === 'utf16le' || enc === 'UTF16LE' || + `${enc}`.toLowerCase() === 'utf16le') return 'utf16le'; - case 'latin1': - case 'binary': - return 'latin1'; - case 'base64': - case 'ascii': - case 'hex': - return enc; - default: - if (retried) return; // undefined - enc = ('' + enc).toLowerCase(); - retried = true; - } + break; + case 8: + if (enc === 'utf-16le' || enc === 'UTF-16LE' || + `${enc}`.toLowerCase() === 'utf-16le') + return 'utf16le'; + break; + default: + if (enc === '') return 'utf8'; } } diff --git a/lib/string_decoder.js b/lib/string_decoder.js index 04d31b2607c63e..18097be0e6dd08 100644 --- a/lib/string_decoder.js +++ b/lib/string_decoder.js @@ -43,10 +43,12 @@ const kNativeDecoder = Symbol('kNativeDecoder'); // modules monkey-patch it to support additional encodings function normalizeEncoding(enc) { const nenc = internalUtil.normalizeEncoding(enc); - if (typeof nenc !== 'string' && - (Buffer.isEncoding === isEncoding || !Buffer.isEncoding(enc))) - throw new errors.TypeError('ERR_UNKNOWN_ENCODING', enc); - return nenc || enc; + if (nenc === undefined) { + if (Buffer.isEncoding === isEncoding || !Buffer.isEncoding(enc)) + throw new errors.TypeError('ERR_UNKNOWN_ENCODING', enc); + return enc; + } + return nenc; } const encodingsMap = {};