From 4fee9554ac0ca73dca6410a493ae366910638f80 Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Tue, 14 Feb 2017 13:20:39 -0800 Subject: [PATCH] url: fix surrogate handling in encodeAuth() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the legacy URL stringifier miscalculates the offset of an extra surrogate, causing the high surrogate to be included unescaped: > url.format({ auth: '\uD83D\uDE00', hostname: 'a' }) '//%F0%9F%98%80�@a' PR-URL: https://github.com/nodejs/node/pull/11387 Backport-of: https://github.com/nodejs/node/pull/11161 --- lib/url.js | 50 +++++++++++++++++++++++---------------- test/parallel/test-url.js | 13 +++++++++- 2 files changed, 42 insertions(+), 21 deletions(-) diff --git a/lib/url.js b/lib/url.js index 1437443e758d3b..59eb82b54ffb46 100644 --- a/lib/url.js +++ b/lib/url.js @@ -948,6 +948,24 @@ function spliceOne(list, index) { var hexTable = new Array(256); for (var i = 0; i < 256; ++i) hexTable[i] = '%' + ((i < 16 ? '0' : '') + i.toString(16)).toUpperCase(); + +// These characters do not need escaping: +// ! - . _ ~ +// ' ( ) * : +// digits +// alpha (uppercase) +// alpha (lowercase) +const noEscapeAuth = [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00 - 0x0F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10 - 0x1F + 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, // 0x20 - 0x2F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, // 0x30 - 0x3F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40 - 0x4F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, // 0x50 - 0x5F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60 - 0x6F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 // 0x70 - 0x7F +]; + function encodeAuth(str) { // faster encodeURIComponent alternative for encoding auth uri components var out = ''; @@ -955,37 +973,28 @@ function encodeAuth(str) { for (var i = 0; i < str.length; ++i) { var c = str.charCodeAt(i); - // These characters do not need escaping: - // ! - . _ ~ - // ' ( ) * : - // digits - // alpha (uppercase) - // alpha (lowercase) - if (c === 0x21 || c === 0x2D || c === 0x2E || c === 0x5F || c === 0x7E || - (c >= 0x27 && c <= 0x2A) || - (c >= 0x30 && c <= 0x3A) || - (c >= 0x41 && c <= 0x5A) || - (c >= 0x61 && c <= 0x7A)) { - continue; - } - - if (i - lastPos > 0) - out += str.slice(lastPos, i); - - lastPos = i + 1; - - // Other ASCII characters + // ASCII if (c < 0x80) { + if (noEscapeAuth[c] === 1) + continue; + if (lastPos < i) + out += str.slice(lastPos, i); + lastPos = i + 1; out += hexTable[c]; continue; } + if (lastPos < i) + out += str.slice(lastPos, i); + // Multi-byte characters ... if (c < 0x800) { + lastPos = i + 1; out += hexTable[0xC0 | (c >> 6)] + hexTable[0x80 | (c & 0x3F)]; continue; } if (c < 0xD800 || c >= 0xE000) { + lastPos = i + 1; out += hexTable[0xE0 | (c >> 12)] + hexTable[0x80 | ((c >> 6) & 0x3F)] + hexTable[0x80 | (c & 0x3F)]; @@ -998,6 +1007,7 @@ function encodeAuth(str) { c2 = str.charCodeAt(i) & 0x3FF; else c2 = 0; + lastPos = i + 1; c = 0x10000 + (((c & 0x3FF) << 10) | c2); out += hexTable[0xF0 | (c >> 18)] + hexTable[0x80 | ((c >> 12) & 0x3F)] + diff --git a/test/parallel/test-url.js b/test/parallel/test-url.js index fec2233dc0fac9..5a5fd476ebfb05 100644 --- a/test/parallel/test-url.js +++ b/test/parallel/test-url.js @@ -891,8 +891,19 @@ var parseTests = { pathname: '/*', path: '/*', href: 'https:///*' - } + }, + // surrogate in auth + 'http://%F0%9F%98%80@www.example.com/': { + href: 'http://%F0%9F%98%80@www.example.com/', + slashes: true, + protocol: 'http:', + auth: '\uD83D\uDE00', + host: 'www.example.com', + hostname: 'www.example.com', + pathname: '/', + path: '/' + } }; for (const u in parseTests) {