From 8452ccdf02853fb011a5f654f206a698a659889a Mon Sep 17 00:00:00 2001 From: Jeremy Stashewsky Date: Fri, 22 Sep 2017 14:53:19 -0700 Subject: [PATCH] Convert date-time parser from regexp, expand tests None of the regexps (at least, when they were removed) are vulnerable to ReDoS. However, took this opportunity to check that the RFC is being closer and more clearly documented where in the code. Another way to put this: "regexps are magic and hinder code analysis" Introduced some equivalence tests to ensure that certain "weird" dates are indeed parsing the same as their "canonical" RFC6265 counterpart. --- lib/cookie.js | 177 +++++++++++++++++++++++++++++++--------------- test/date_test.js | 75 ++++++++++++++++++++ 2 files changed, 194 insertions(+), 58 deletions(-) diff --git a/lib/cookie.js b/lib/cookie.js index bfd228d2..9f1afa18 100644 --- a/lib/cookie.js +++ b/lib/cookie.js @@ -62,14 +62,7 @@ var PATH_VALUE = /[\x20-\x3A\x3C-\x7E]+/; // date-time parsing constants (RFC6265 S5.1.1) var DATE_DELIM = /[\x09\x20-\x2F\x3B-\x40\x5B-\x60\x7B-\x7E]/; -var DAY_OF_MONTH = /^(\d{1,2})(?:[^\d]|$)/; -// S5.1.1 for "hms-time" -- is one or two digits each separated by : -// Cannot have non-digits beside the numbers like in other parts of the -// construction. -var TIME = /^(\d{1,2}):(\d{1,2}):(\d{1,2})(?:[^\d]|$)/; // only anchor at start - -var MONTH = /^(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)/i; var MONTH_TO_NUM = { jan:0, feb:1, mar:2, apr:3, may:4, jun:5, jul:6, aug:7, sep:8, oct:9, nov:10, dec:11 @@ -81,13 +74,80 @@ var NUM_TO_DAY = [ 'Sun','Mon','Tue','Wed','Thu','Fri','Sat' ]; -var YEAR = /^(\d{2}|\d{4})(?:[^\d]|$)/; // 2 or 4 digits, anchored at start - var MAX_TIME = 2147483647000; // 31-bit max var MIN_TIME = 0; // 31-bit min +/* + * Parses a Natural number (i.e., non-negative integer) with either the + * *DIGIT ( non-digit *OCTET ) + * or + * *DIGIT + * grammar (RFC6265 S5.1.1). + * + * The "trailingOK" boolean controls if the grammar accepts a + * "( non-digit *OCTET )" trailer. + */ +function parseDigits(token, minDigits, maxDigits, trailingOK) { + var count = 0; + while (count < token.length) { + var c = token.charCodeAt(count); + // "non-digit = %x00-2F / %x3A-FF" + if (c <= 0x2F || c >= 0x3A) { + break; + } + count++; + } + + // constrain to a minimum and maximum number of digits. + if (count < minDigits || count > maxDigits) { + return null; + } -// RFC6265 S5.1.1 date parser: + if (!trailingOK && count != token.length) { + return null; + } + + return parseInt(token.substr(0,count), 10); +} + +function parseTime(token) { + var parts = token.split(':'); + var result = [0,0,0]; + + /* RF6256 S5.1.1: + * time = hms-time ( non-digit *OCTET ) + * hms-time = time-field ":" time-field ":" time-field + * time-field = 1*2DIGIT + */ + + if (parts.length !== 3) { + return null; + } + + for (var i = 0; i < 3; i++) { + // "time-field" must be strictly "1*2DIGIT", HOWEVER, "hms-time" can be + // followed by "( non-digit *OCTET )" so therefore the last time-field can + // have a trailer + var trailingOK = (i == 2); + var num = parseDigits(parts[i], 1, 2, trailingOK); + if (num === null) { + return null; + } + result[i] = num; + } + + return result; +} + +function parseMonth(token) { + token = String(token).substr(0,3).toLowerCase(); + var num = MONTH_TO_NUM[token]; + return num >= 0 ? num : null; +} + +/* + * RFC6265 S5.1.1 date parser (see RFC for full grammar) + */ function parseDate(str) { if (!str) { return; @@ -103,9 +163,9 @@ function parseDate(str) { } var hour = null; - var minutes = null; - var seconds = null; - var day = null; + var minute = null; + var second = null; + var dayOfMonth = null; var month = null; var year = null; @@ -123,22 +183,12 @@ function parseDate(str) { * the date-token, respectively. Skip the remaining sub-steps and continue * to the next date-token. */ - if (seconds === null) { - result = TIME.exec(token); + if (second === null) { + result = parseTime(token); if (result) { - hour = parseInt(result[1], 10); - minutes = parseInt(result[2], 10); - seconds = parseInt(result[3], 10); - /* RFC6265 S5.1.1.5: - * [fail if] - * * the hour-value is greater than 23, - * * the minute-value is greater than 59, or - * * the second-value is greater than 59. - */ - if(hour > 23 || minutes > 59 || seconds > 59) { - return; - } - + hour = result[0]; + minute = result[1]; + second = result[2]; continue; } } @@ -148,16 +198,11 @@ function parseDate(str) { * the day-of-month-value to the number denoted by the date-token. Skip * the remaining sub-steps and continue to the next date-token. */ - if (day === null) { - result = DAY_OF_MONTH.exec(token); - if (result) { - day = parseInt(result[1], 10); - /* RFC6265 S5.1.1.5: - * [fail if] the day-of-month-value is less than 1 or greater than 31 - */ - if(day < 1 || day > 31) { - return; - } + if (dayOfMonth === null) { + // "day-of-month = 1*2DIGIT ( non-digit *OCTET )" + result = parseDigits(token, 1, 2, true); + if (result !== null) { + dayOfMonth = result; continue; } } @@ -168,47 +213,63 @@ function parseDate(str) { * continue to the next date-token. */ if (month === null) { - result = MONTH.exec(token); - if (result) { - month = MONTH_TO_NUM[result[1].toLowerCase()]; + result = parseMonth(token); + if (result !== null) { + month = result; continue; } } - /* 2.4. If the found-year flag is not set and the date-token matches the year - * production, set the found-year flag and set the year-value to the number - * denoted by the date-token. Skip the remaining sub-steps and continue to - * the next date-token. + /* 2.4. If the found-year flag is not set and the date-token matches the + * year production, set the found-year flag and set the year-value to the + * number denoted by the date-token. Skip the remaining sub-steps and + * continue to the next date-token. */ if (year === null) { - result = YEAR.exec(token); - if (result) { - year = parseInt(result[0], 10); + // "year = 2*4DIGIT ( non-digit *OCTET )" + result = parseDigits(token, 2, 4, true); + if (result !== null) { + year = result; /* From S5.1.1: * 3. If the year-value is greater than or equal to 70 and less * than or equal to 99, increment the year-value by 1900. * 4. If the year-value is greater than or equal to 0 and less * than or equal to 69, increment the year-value by 2000. */ - if (70 <= year && year <= 99) { + if (year >= 70 && year <= 99) { year += 1900; - } else if (0 <= year && year <= 69) { + } else if (year >= 0 && year <= 69) { year += 2000; } - - if (year < 1601) { - return; // 5. ... the year-value is less than 1601 - } } } } - if (seconds === null || day === null || month === null || year === null) { - return; // 5. ... at least one of the found-day-of-month, found-month, found- - // year, or found-time flags is not set, + /* RFC 6265 S5.1.1 + * "5. Abort these steps and fail to parse the cookie-date if: + * * at least one of the found-day-of-month, found-month, found- + * year, or found-time flags is not set, + * * the day-of-month-value is less than 1 or greater than 31, + * * the year-value is less than 1601, + * * the hour-value is greater than 23, + * * the minute-value is greater than 59, or + * * the second-value is greater than 59. + * (Note that leap seconds cannot be represented in this syntax.)" + * + * So, in order as above: + */ + if ( + dayOfMonth === null || month === null || year === null || second === null || + dayOfMonth < 1 || dayOfMonth > 31 || + year < 1601 || + hour > 23 || + minute > 59 || + second > 59 + ) { + return; } - return new Date(Date.UTC(year, month, day, hour, minutes, seconds)); + return new Date(Date.UTC(year, month, dayOfMonth, hour, minute, second)); } function formatDate(date) { diff --git a/test/date_test.js b/test/date_test.js index 39d0cef6..6efd30df 100644 --- a/test/date_test.js +++ b/test/date_test.js @@ -53,12 +53,30 @@ function dateVows(table) { return {"date parsing": theVows}; } +function equivalenceVows(table) { + var theVows = {}; + Object.keys(table).forEach(function (thisDate) { + var sameAs = table[thisDate]; + var label = "'"+thisDate+"' parses the same as '"+sameAs+"'"; + theVows[label] = function () { + var expected = tough.parseDate(sameAs); + var actual = tough.parseDate(thisDate); + if (!expected && !actual) { + assert.ok(false, "both dates failed to parse!"); + } + assert.equal(actual.toString(), expected.toString()); + }; + }); + return {"equivalence parsing": theVows}; +} + var TOO_MANY_XS = String("x").repeat(65535); vows .describe('Date') .addBatch(dateVows({ "Wed, 09 Jun 2021 10:18:14 GMT": true, + "Wed, 09 JUN 2021 10:18:14 GMT": true, "Wed, 09 Jun 2021 22:18:14 GMT": true, "Tue, 18 Oct 2011 07:42:42.123 GMT": true, "18 Oct 2011 07:42:42 GMT": true, @@ -90,6 +108,19 @@ vows "Thu, 01 Jan 1970 00:000:01 GMT": false, "Thu, 01 Jan 1970 00:00:010 GMT": false, + // hex in time + "Wed, 09 Jun 2021 1a:33:44 GMT": false, + "Wed, 09 Jun 2021 a1:33:44 GMT": false, + "Wed, 09 Jun 2021 11:f3:44 GMT": false, + "Wed, 09 Jun 2021 11:3f:44 GMT": false, + "Wed, 09 Jun 2021 11:33:e4 GMT": false, + "Wed, 09 Jun 2021 11:33:4e GMT": true, // garbage after seconds is OK + + // negatives in time + "Wed, 09 Jun 2021 -1:33:44 GMT": true, // parses as 1:33; - is a delimiter + "Wed, 09 Jun 2021 11:-3:44 GMT": false, + "Wed, 09 Jun 2021 11:33:-4 GMT": false, + "": false })) .addBatch({ @@ -121,4 +152,48 @@ vows } } }) + .addBatch(equivalenceVows({ + // milliseconds ignored + "Tue, 18 Oct 2011 07:42:42.123 GMT": "Tue, 18 Oct 2011 07:42:42 GMT", + + // shorter HH:MM:SS works how you'd expect: + "8 Oct 2011 7:32:42 GMT": "8 Oct 2011 07:32:42 GMT", + "8 Oct 2011 7:2:42 GMT": "8 Oct 2011 07:02:42 GMT", + "8 Oct 2011 7:2:2 GMT": "8 Oct 2011 07:02:02 GMT", + + // MDY versus DMY: + "Oct 18 2011 07:42:42 GMT": "18 Oct 2011 07:42:42 GMT", + + // some other messy auto format + "Tue Oct 18 2011 07:05:03 GMT+0000 (GMT)": "Tue, 18 Oct 2011 07:05:03 GMT", + + // short year + '10 Feb 81 13:00:00 GMT': '10 Feb 1981 13:00:00 GMT', + '10 Feb 17 13:00:00 GMT': '10 Feb 2017 13:00:00 GMT', + + // dashes + 'Thu, 17-Apr-2014 02:12:29 GMT': 'Thu, 17 Apr 2014 02:12:29 GMT', + // dashes and "UTC" (timezone is always ignored) + 'Thu, 17-Apr-2014 02:12:29 UTC': 'Thu, 17 Apr 2014 02:12:29 GMT', + + // no weekday + "09 Jun 2021 10:18:14 GMT": "Wed, 09 Jun 2021 10:18:14 GMT", + + // garbage after seconds is OK + "Wed, 09 Jun 2021 11:33:4e GMT": "Wed, 09 Jun 2021 11:33:04 GMT", + + // - is delimiter in this position + "Wed, 09 Jun 2021 -1:33:44 GMT": "Wed, 09 Jun 2021 01:33:44 GMT", + + // prefix match on month + "Wed, 09 Junxxx 2021 10:18:14 GMT": "Wed, 09 Jun 2021 10:18:14 GMT", + "09 November 2021 10:18:14 GMT": "09 Nov 2021 10:18:14 GMT", + + // case of Month + "Wed, 09 JUN 2021 10:18:14 GMT": "Wed, 09 Jun 2021 10:18:14 GMT", + "Wed, 09 jUN 2021 10:18:14 GMT": "Wed, 09 Jun 2021 10:18:14 GMT", + + // test the framework :wink: + "Wed, 09 Jun 2021 10:18:14 GMT": "Wed, 09 Jun 2021 10:18:14 GMT" + })) .export(module);