From 301023b2b4888b57ec5a7b03d754aaafb09580a0 Mon Sep 17 00:00:00 2001 From: Brian White Date: Sun, 31 Jan 2016 13:54:18 -0500 Subject: [PATCH] querystring: improve parse() performance This commit improves parse() performance by ~20-200% with the various querystring-parse benchmarks. Some optimization strategies used in this commit include: * Combining multiple searches (for '&', '=', and '+') on the same string into a single loop * Avoiding string.split() * Minimizing creation of temporary strings * Avoiding string decoding if no encoded bytes were found and the default string decoder is being used PR-URL: https://github.com/nodejs/node/pull/5012 Reviewed-By: James M Snell Reviewed-By: Roman Reiss Reviewed-By: Matteo Collina --- benchmark/querystring/querystring-parse.js | 42 +++- lib/querystring.js | 212 +++++++++++++++------ test/parallel/test-querystring.js | 3 + 3 files changed, 193 insertions(+), 64 deletions(-) diff --git a/benchmark/querystring/querystring-parse.js b/benchmark/querystring/querystring-parse.js index 6a4d9f5e6169f4..6c912c0ac2e868 100644 --- a/benchmark/querystring/querystring-parse.js +++ b/benchmark/querystring/querystring-parse.js @@ -3,7 +3,13 @@ var querystring = require('querystring'); var v8 = require('v8'); var bench = common.createBenchmark(main, { - type: ['noencode', 'encodemany', 'encodelast', 'multivalue'], + type: ['noencode', + 'multicharsep', + 'encodemany', + 'encodelast', + 'multivalue', + 'multivaluemany', + 'manypairs'], n: [1e6], }); @@ -13,22 +19,38 @@ function main(conf) { var inputs = { noencode: 'foo=bar&baz=quux&xyzzy=thud', + multicharsep: 'foo=bar&&&&&&&&&&baz=quux&&&&&&&&&&xyzzy=thud', encodemany: '%66%6F%6F=bar&%62%61%7A=quux&xyzzy=%74h%75d', encodelast: 'foo=bar&baz=quux&xyzzy=thu%64', - multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz' + multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz', + multivaluemany: 'foo=bar&foo=baz&foo=quux&quuy=quuz&foo=abc&foo=def&' + + 'foo=ghi&foo=jkl&foo=mno&foo=pqr&foo=stu&foo=vwxyz', + manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z' }; var input = inputs[type]; // Force-optimize querystring.parse() so that the benchmark doesn't get // disrupted by the optimizer kicking in halfway through. - for (var name in inputs) - querystring.parse(inputs[name]); - v8.setFlagsFromString('--allow_natives_syntax'); - eval('%OptimizeFunctionOnNextCall(querystring.parse)'); - - bench.start(); - for (var i = 0; i < n; i += 1) + if (type !== 'multicharsep') { querystring.parse(input); - bench.end(n); + eval('%OptimizeFunctionOnNextCall(querystring.parse)'); + querystring.parse(input); + } else { + querystring.parse(input, '&&&&&&&&&&'); + eval('%OptimizeFunctionOnNextCall(querystring.parse)'); + querystring.parse(input, '&&&&&&&&&&'); + } + + if (type !== 'multicharsep') { + bench.start(); + for (var i = 0; i < n; i += 1) + querystring.parse(input); + bench.end(n); + } else { + bench.start(); + for (var i = 0; i < n; i += 1) + querystring.parse(input, '&&&&&&&&&&'); + bench.end(n); + } } diff --git a/lib/querystring.js b/lib/querystring.js index 4244d8c18b8122..b3b6b84c76632f 100644 --- a/lib/querystring.js +++ b/lib/querystring.js @@ -78,13 +78,14 @@ QueryString.unescapeBuffer = function(s, decodeSpaces) { }; -QueryString.unescape = function(s, decodeSpaces) { +function qsUnescape(s, decodeSpaces) { try { return decodeURIComponent(s); } catch (e) { return QueryString.unescapeBuffer(s, decodeSpaces).toString(); } -}; +} +QueryString.unescape = qsUnescape; var hexTable = new Array(256); @@ -198,63 +199,183 @@ QueryString.stringify = QueryString.encode = function(obj, sep, eq, options) { return ''; }; -// Parse a key=val string. +// Parse a key/val string. QueryString.parse = QueryString.decode = function(qs, sep, eq, options) { sep = sep || '&'; eq = eq || '='; - const eqLen = eq.length; - var obj = {}; + + const obj = {}; if (typeof qs !== 'string' || qs.length === 0) { return obj; } + if (typeof sep !== 'string') + sep += ''; + + const eqLen = eq.length; + const sepLen = sep.length; + var maxKeys = 1000; if (options && typeof options.maxKeys === 'number') { maxKeys = options.maxKeys; } - // maxKeys <= 0 means that we should not limit keys count - if (maxKeys > 0 && isFinite(maxKeys)) { - qs = qs.split(sep, maxKeys); - } else { - qs = qs.split(sep); - } - - var len = qs.length; + var pairs = Infinity; + if (maxKeys > 0) + pairs = maxKeys; var decode = QueryString.unescape; if (options && typeof options.decodeURIComponent === 'function') { decode = options.decodeURIComponent; } - - var keys = []; - for (var i = 0; i < len; ++i) { - // replacePlus() is used instead of a regexp because it is ~15-30% faster - // with v8 4.7 - const x = replacePlus(qs[i]); - const idx = x.indexOf(eq); - var k, v; - - if (idx >= 0) { - k = decodeStr(x.substring(0, idx), decode); - v = decodeStr(x.substring(idx + eqLen), decode); + const customDecode = (decode !== qsUnescape); + + const keys = []; + var lastPos = 0; + var sepIdx = 0; + var eqIdx = 0; + var key = ''; + var value = ''; + var keyEncoded = customDecode; + var valEncoded = customDecode; + var encodeCheck = 0; + for (var i = 0; i < qs.length; ++i) { + const code = qs.charCodeAt(i); + + // Try matching key/value pair separator (e.g. '&') + if (code === sep.charCodeAt(sepIdx)) { + if (++sepIdx === sepLen) { + // Key/value pair separator match! + const end = i - sepIdx + 1; + if (eqIdx < eqLen) { + // If we didn't find the key/value separator, treat the substring as + // part of the key instead of the value + if (lastPos < end) + key += qs.slice(lastPos, end); + } else if (lastPos < end) + value += qs.slice(lastPos, end); + if (keyEncoded) + key = decodeStr(key, decode); + if (valEncoded) + value = decodeStr(value, decode); + // Use a key array lookup instead of using hasOwnProperty(), which is + // slower + if (keys.indexOf(key) === -1) { + obj[key] = value; + keys[keys.length] = key; + } else { + const curValue = obj[key]; + // `instanceof Array` is used instead of Array.isArray() because it + // is ~15-20% faster with v8 4.7 and is safe to use because we are + // using it with values being created within this function + if (curValue instanceof Array) + curValue[curValue.length] = value; + else + obj[key] = [curValue, value]; + } + if (--pairs === 0) + break; + keyEncoded = valEncoded = customDecode; + encodeCheck = 0; + key = value = ''; + lastPos = i + 1; + sepIdx = eqIdx = 0; + } + continue; } else { - k = decodeStr(x, decode); - v = ''; + sepIdx = 0; + if (!valEncoded) { + // Try to match an (valid) encoded byte (once) to minimize unnecessary + // calls to string decoding functions + if (code === 37/*%*/) { + encodeCheck = 1; + } else if (encodeCheck > 0 && + ((code >= 48/*0*/ && code <= 57/*9*/) || + (code >= 65/*A*/ && code <= 70/*Z*/) || + (code >= 97/*a*/ && code <= 102/*z*/))) { + if (++encodeCheck === 3) + valEncoded = true; + } else { + encodeCheck = 0; + } + } } - // Use a key array lookup instead of using hasOwnProperty(), which is slower - if (keys.indexOf(k) === -1) { - obj[k] = v; - keys.push(k); - } else if (obj[k] instanceof Array) { - // `instanceof Array` is used instead of Array.isArray() because it is - // ~15-20% faster with v8 4.7 and is safe to use because we are using it - // with values being created within this function - obj[k].push(v); + // Try matching key/value separator (e.g. '=') if we haven't already + if (eqIdx < eqLen) { + if (code === eq.charCodeAt(eqIdx)) { + if (++eqIdx === eqLen) { + // Key/value separator match! + const end = i - eqIdx + 1; + if (lastPos < end) + key += qs.slice(lastPos, end); + encodeCheck = 0; + lastPos = i + 1; + } + continue; + } else { + eqIdx = 0; + if (!keyEncoded) { + // Try to match an (valid) encoded byte once to minimize unnecessary + // calls to string decoding functions + if (code === 37/*%*/) { + encodeCheck = 1; + } else if (encodeCheck > 0 && + ((code >= 48/*0*/ && code <= 57/*9*/) || + (code >= 65/*A*/ && code <= 70/*Z*/) || + (code >= 97/*a*/ && code <= 102/*z*/))) { + if (++encodeCheck === 3) + keyEncoded = true; + } else { + encodeCheck = 0; + } + } + } + } + + if (code === 43/*+*/) { + if (eqIdx < eqLen) { + if (i - lastPos > 0) + key += qs.slice(lastPos, i); + key += '%20'; + keyEncoded = true; + } else { + if (i - lastPos > 0) + value += qs.slice(lastPos, i); + value += '%20'; + valEncoded = true; + } + lastPos = i + 1; + } + } + + // Check if we have leftover key or value data + if (pairs > 0 && (lastPos < qs.length || eqIdx > 0)) { + if (lastPos < qs.length) { + if (eqIdx < eqLen) + key += qs.slice(lastPos); + else if (sepIdx < sepLen) + value += qs.slice(lastPos); + } + if (keyEncoded) + key = decodeStr(key, decode); + if (valEncoded) + value = decodeStr(value, decode); + // Use a key array lookup instead of using hasOwnProperty(), which is + // slower + if (keys.indexOf(key) === -1) { + obj[key] = value; + keys[keys.length] = key; } else { - obj[k] = [obj[k], v]; + const curValue = obj[key]; + // `instanceof Array` is used instead of Array.isArray() because it + // is ~15-20% faster with v8 4.7 and is safe to use because we are + // using it with values being created within this function + if (curValue instanceof Array) + curValue[curValue.length] = value; + else + obj[key] = [curValue, value]; } } @@ -262,23 +383,6 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) { }; -function replacePlus(str) { - var ret = ''; - var start = 0; - var i = -1; - while ((i = str.indexOf('+', i + 1)) !== -1) { - ret += str.slice(start, i); - ret += '%20'; - start = i + 1; - } - if (start === 0) - return str; - if (start < str.length) - ret += str.slice(start); - return ret; -} - - // v8 does not optimize functions with try-catch blocks, so we isolate them here // to minimize the damage function decodeStr(s, decoder) { diff --git a/test/parallel/test-querystring.js b/test/parallel/test-querystring.js index b4388852499ed1..c8e9cc7050af5b 100644 --- a/test/parallel/test-querystring.js +++ b/test/parallel/test-querystring.js @@ -248,3 +248,6 @@ qs.unescape = function(str) { }; assert.deepEqual(qs.parse('foo=bor'), {f__: 'b_r'}); qs.unescape = prevUnescape; + +// test separator and "equals" parsing order +assert.deepEqual(qs.parse('foo&bar', '&', '&'), { foo: '', bar: '' });