From 87b8f02daa5af5d0dac3f9f37b75ea9ce935fbdd Mon Sep 17 00:00:00 2001 From: Anna Henningsen Date: Sun, 25 Aug 2019 03:07:09 +0200 Subject: [PATCH] lib: add ASCII fast path to getStringWidth() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A lot of strings that are going to be passed to `getStringWidth()` are ASCII strings, for which the calculation is rather easy and calling into C++ can be skipped. confidence improvement accuracy (*) (**) (***) misc/getstringwidth.js n=100000 type='ascii' *** 328.99 % ±21.73% ±29.25% ±38.77% misc/getstringwidth.js n=100000 type='emojiseq' 2.94 % ±7.66% ±10.19% ±13.26% misc/getstringwidth.js n=100000 type='fullwidth' 4.70 % ±5.64% ±7.50% ±9.76% PR-URL: https://github.com/nodejs/node/pull/29301 Reviewed-By: Gus Caplan Reviewed-By: Trivikram Kamat Reviewed-By: Ben Noordhuis Reviewed-By: Colin Ihrig Reviewed-By: James M Snell Reviewed-By: Luigi Pinca Reviewed-By: Minwoo Jung Reviewed-By: Rich Trott --- benchmark/misc/getstringwidth.js | 26 +++++++++++++++++++++ lib/internal/readline/utils.js | 33 +++++++++++++++++++++------ test/parallel/test-icu-stringwidth.js | 22 ++++++++++++++++++ 3 files changed, 74 insertions(+), 7 deletions(-) create mode 100644 benchmark/misc/getstringwidth.js diff --git a/benchmark/misc/getstringwidth.js b/benchmark/misc/getstringwidth.js new file mode 100644 index 00000000000000..12f071c60dd7eb --- /dev/null +++ b/benchmark/misc/getstringwidth.js @@ -0,0 +1,26 @@ +'use strict'; + +const common = require('../common.js'); + +const bench = common.createBenchmark(main, { + type: ['ascii', 'mixed', 'emojiseq', 'fullwidth'], + n: [10e4] +}, { + flags: ['--expose-internals'] +}); + +function main({ n, type }) { + const { getStringWidth } = require('internal/readline/utils'); + + const str = ({ + ascii: 'foobar'.repeat(100), + mixed: 'foo'.repeat(100) + '😀' + 'bar'.repeat(100), + emojiseq: '👨‍👨‍👧‍👦👨‍👩‍👦‍👦👨‍👩‍👧‍👧👩‍👩‍👧‍👦'.repeat(10), + fullwidth: '你好'.repeat(150) + })[type]; + + bench.start(); + for (let j = 0; j < n; j += 1) + getStringWidth(str); + bench.end(n); +} diff --git a/lib/internal/readline/utils.js b/lib/internal/readline/utils.js index c6cd13a6bd19eb..f72a03bb3915f4 100644 --- a/lib/internal/readline/utils.js +++ b/lib/internal/readline/utils.js @@ -34,13 +34,32 @@ if (internalBinding('config').hasIntl) { const icu = internalBinding('icu'); getStringWidth = function getStringWidth(str, options) { options = options || {}; - if (!Number.isInteger(str)) - str = stripVTControlCharacters(String(str)); - return icu.getStringWidth( - str, - Boolean(options.ambiguousAsFullWidth), - Boolean(options.expandEmojiSequence) - ); + if (Number.isInteger(str)) { + // Provide information about the character with code point 'str'. + return icu.getStringWidth( + str, + Boolean(options.ambiguousAsFullWidth), + false + ); + } + str = stripVTControlCharacters(String(str)); + let width = 0; + for (let i = 0; i < str.length; i++) { + // Try to avoid calling into C++ by first handling the ASCII portion of + // the string. If it is fully ASCII, we skip the C++ part. + const code = str.charCodeAt(i); + if (code < 127) { + width += code >= 32; + continue; + } + width += icu.getStringWidth( + str.slice(i), + Boolean(options.ambiguousAsFullWidth), + Boolean(options.expandEmojiSequence) + ); + break; + } + return width; }; isFullWidthCodePoint = function isFullWidthCodePoint(code, options) { diff --git a/test/parallel/test-icu-stringwidth.js b/test/parallel/test-icu-stringwidth.js index 0620d3af3934ca..48384f916d9126 100644 --- a/test/parallel/test-icu-stringwidth.js +++ b/test/parallel/test-icu-stringwidth.js @@ -69,3 +69,25 @@ assert.strictEqual( // Control chars and combining chars are zero assert.strictEqual(readline.getStringWidth('\u200E\n\u220A\u20D2'), 1); + +// Test that the fast path for ASCII characters yields results consistent +// with the 'slow' path. +for (const ambiguousAsFullWidth of [ false, true ]) { + for (let i = 0; i < 256; i++) { + const char = String.fromCharCode(i); + assert.strictEqual( + readline.getStringWidth(i, { ambiguousAsFullWidth }), + readline.getStringWidth(char, { ambiguousAsFullWidth })); + assert.strictEqual( + readline.getStringWidth(char + '🎉', { ambiguousAsFullWidth }), + readline.getStringWidth(char, { ambiguousAsFullWidth }) + 2); + + if (i < 32 || (i >= 127 && i < 160)) { // Control character + assert.strictEqual( + readline.getStringWidth(i, { ambiguousAsFullWidth }), 0); + } else if (i < 127) { // Regular ASCII character + assert.strictEqual( + readline.getStringWidth(i, { ambiguousAsFullWidth }), 1); + } + } +}