From e7927220ef35a4de4d120427cb790d9d8346a4a4 Mon Sep 17 00:00:00 2001 From: Miki Date: Mon, 10 Jul 2023 22:52:09 -0700 Subject: [PATCH] Add serialization and deserialization of numerals larger than `Number.MAX_SAFE_INTEGER` Signed-off-by: Miki --- CHANGELOG.md | 3 +- lib/Serializer.js | 90 +++++++++++++++++++++++++++++++++++- test/unit/serializer.test.js | 21 +++++++++ 3 files changed, 111 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d5d5e9bb5..3288a9f07 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) ## [Unreleased] ### Added +- Add serialization and deserialization of numerals larger than `Number.MAX_SAFE_INTEGER` ([#544](https://github.com/opensearch-project/opensearch-js/pull/544)) ### Dependencies - Bumps `prettier` from 2.8.7 to 2.8.8 - Bumps `ora` from 6.1.2 to 6.3.0 @@ -146,4 +147,4 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) [2.1.0]: https://github.com/opensearch-project/opensearch-js/releases/tag/2.1.0 [2.2.0]: https://github.com/opensearch-project/opensearch-js/releases/tag/2.2.0 [2.2.1]: https://github.com/opensearch-project/opensearch-js/releases/tag/2.2.1 -[Unreleased]: https://github.com/opensearch-project/opensearch-js/compare/2.2.1...HEAD \ No newline at end of file +[Unreleased]: https://github.com/opensearch-project/opensearch-js/compare/2.2.1...HEAD diff --git a/lib/Serializer.js b/lib/Serializer.js index 06bec7199..3ae5c5238 100644 --- a/lib/Serializer.js +++ b/lib/Serializer.js @@ -35,6 +35,77 @@ const sjson = require('secure-json-parse'); const { SerializationError, DeserializationError } = require('./errors'); const kJsonOptions = Symbol('secure json parse options'); +/* In JavaScript, a `Number` is a 64-bit floating-point value which can store 16 digits. However, the + * serializer and deserializer will need to cater to numeric values generated by other languages which + * can have up to 19 digits. Native JSON parser and stringifier, incapable of handling the extra + * digits, corrupt the values, making them unusable. + * + * To work around this limitation, the deserializer converts long sequences of digits into strings and + * marks them before applying the parser. During the parsing, string values that begin with the mark + * are converted to `BigInt` values. + * Similarly, during stringification, the serializer converts `BigInt` values to marked strings and + * when done, it replaces them with plain numerals. + * + * `Number.MAX_SAFE_INTEGER`, 9,007,199,254,740,991, is the largest number that the native methods can + * parse and stringify, and any numeral greater than that would need to be translated using the + * workaround; all 17-digits or longer and only tail-end of the 16-digits need translation. It would + * be unfair to all the 16-digit numbers if the translation applied to `\d{16,}` only to cover the + * less than 10%. Hence, a RegExp is created to only match numerals too long to be a number. + * + * To make the explanation simpler, let's assume that MAX_SAFE_INTEGER is 8921 which has 4 digits. + * Starting from the right, we take each digit onwards, `[-9]`: + * 1) 7922 - 7929: 792[2-9]\d{0} + * 2) 7930 - 7999: 79[3-9]\d{1} + * 9) 9 + 1 = 10 which results in a rollover; no need to do anything. + * 8) 9000 - 9999: [9-9]\d{3} + * Finally we add anything 5 digits or longer: `\d{5,} + * + * PS, a better solution would use AST but considering its performance penalty, RegExp is the next + * best solution. + */ +const maxIntAsString = String(Number.MAX_SAFE_INTEGER); +const maxIntLength = maxIntAsString.length; +// Sub-patterns for each digit +const bigIntMatcherTokens = [`\\d{${maxIntAsString.length + 1},}`]; +for (let i = 0; i < maxIntLength; i++) { + if (maxIntAsString[i] !== '9') { + bigIntMatcherTokens.push( + maxIntAsString.substring(0, i) + + `[${parseInt(maxIntAsString[i], 10) + 1}-9]` + + `\\d{${maxIntLength - i - 1}}` + ); + } +} + +/* The matcher that looks for `": , ...}` and `[..., , ...]` + * + * The pattern starts by looking for `":` not immediately preceded by a `\`. That should be + * followed by any of the numeric sub-patterns. A comma, end of an array, end of an object, or + * the end of the input are the only acceptable elements after it. + */ +const bigIntMatcher = new RegExp( + `(\\[|,|(?", ...}` and `[..., "", ...]` in previously marked numerals +const markedBigIntMatcher = new RegExp( + `(\\[|,|(? (typeof val === 'bigint' ? `${bigIntMark}${val.toString()}` : val) + ) + // Replace marked substrings with just the numerals + .replace(markedBigIntMatcher, markedBigIntResolver); } catch (err) { throw new SerializationError(err.message, object); } @@ -59,7 +136,16 @@ class Serializer { debug('Deserializing', json); let object; try { - object = sjson.parse(json, this[kJsonOptions]); + object = sjson.parse( + // Convert long numerals to strings and mark them + json.replace(bigIntMatcher, bigIntMarker), + (key, val) => + // Convert marked values to BigInt values + typeof val === 'string' && val.startsWith(bigIntMark) && bigIntMarkFinder.test(val) + ? BigInt(val.substring(bigIntMarkLength)) // eslint-disable-line no-undef + : val, + this[kJsonOptions] + ); } catch (err) { throw new DeserializationError(err.message, json); } diff --git a/test/unit/serializer.test.js b/test/unit/serializer.test.js index ccbb9baf2..bf97606f5 100644 --- a/test/unit/serializer.test.js +++ b/test/unit/serializer.test.js @@ -43,6 +43,27 @@ test('Basic', (t) => { t.same(s.deserialize(json), obj); }); +test('Long numerals', (t) => { + t.plan(5); + const s = new Serializer(); + const longPositive = BigInt(Number.MAX_SAFE_INTEGER) * 2n; // eslint-disable-line no-undef + const longNegative = BigInt(Number.MIN_SAFE_INTEGER) * 2n; // eslint-disable-line no-undef + const json = + `{` + + `"\\":${longPositive}": "NO-MATCH", ` + + `"positive": ${longPositive.toString()}, ` + + `"array": [ ${longNegative.toString()}, ${longPositive.toString()} ], ` + + `"negative": ${longNegative.toString()}` + + `}`; + const obj = s.deserialize(json); + const res = s.serialize(obj); + t.equal(obj.positive, longPositive); + t.equal(obj.negative, longNegative); + t.same(obj.array, [longNegative, longPositive]); + t.equal(obj['":' + longPositive], 'NO-MATCH'); + t.equal(res, json.replace(/\s+/g, '')); +}); + test('ndserialize', (t) => { t.plan(1); const s = new Serializer();