diff --git a/core/scripts/i18n/collect-strings.js b/core/scripts/i18n/collect-strings.js index 2c5ac8a5d695..998ad88856e6 100644 --- a/core/scripts/i18n/collect-strings.js +++ b/core/scripts/i18n/collect-strings.js @@ -14,7 +14,7 @@ import {pathToFileURL} from 'url'; import glob from 'glob'; import {expect} from 'expect'; import tsc from 'typescript'; -import MessageParser from 'intl-messageformat-parser'; +import MessageParser from '@formatjs/icu-messageformat-parser'; import esMain from 'es-main'; import isDeepEqual from 'lodash/isEqual.js'; @@ -24,6 +24,7 @@ import {pruneObsoleteLhlMessages} from './prune-obsolete-lhl-messages.js'; import {countTranslatedMessages} from './count-translated.js'; import {LH_ROOT} from '../../../shared/root.js'; import {resolveModulePath} from '../../../shared/esm-utils.js'; +import {escapeIcuMessage} from '../../../shared/localization/format.js'; // Match declarations of UIStrings, terminating in either a `};\n` (very likely to always be right) // or `}\n\n` (allowing semicolon to be optional, but insisting on a double newline so that an @@ -189,36 +190,35 @@ function convertMessageToCtc(lhlMessage, examples = {}) { * @param {string} lhlMessage */ function _lhlValidityChecks(lhlMessage) { - let parsedMessage; + let parsedMessageElements; try { - parsedMessage = MessageParser.parse(lhlMessage); + parsedMessageElements = MessageParser.parse(escapeIcuMessage(lhlMessage), {ignoreTag: true}); } catch (err) { if (err.name !== 'SyntaxError') throw err; - // Improve the intl-messageformat-parser syntax error output. - /** @type {Array<{text: string}>} */ - const expected = err.expected; - const expectedStr = expected.map(exp => `'${exp.text}'`).join(', '); - throw new Error(`Did not find the expected syntax (one of ${expectedStr}) in message "${lhlMessage}"`); + throw new Error(`[${err.message}] Did not find the expected syntax in message: ${err.originalMessage}`); } - for (const element of parsedMessage.elements) { - if (element.type !== 'argumentElement' || !element.format) continue; - - if (element.format.type === 'pluralFormat' || element.format.type === 'selectFormat') { - // `plural`/`select` arguments can't have content before or after them. - // See http://userguide.icu-project.org/formatparse/messages#TOC-Complex-Argument-Types - // e.g. https://github.com/GoogleChrome/lighthouse/pull/11068#discussion_r451682796 - if (parsedMessage.elements.length > 1) { - throw new Error(`Content cannot appear outside plural or select ICU messages. Instead, repeat that content in each option (message: '${lhlMessage}')`); - } - - // Each option value must also be a valid lhlMessage. - for (const option of element.format.options) { - const optionStr = lhlMessage.slice(option.value.location.start.offset, option.value.location.end.offset); - _lhlValidityChecks(optionStr); + /** + * @param {MessageParser.MessageFormatElement[]} elements + */ + function validate(elements) { + for (const element of elements) { + if (element.type === MessageParser.TYPE.plural || element.type === MessageParser.TYPE.select) { + // `plural`/`select` arguments can't have content before or after them. + // See http://userguide.icu-project.org/formatparse/messages#TOC-Complex-Argument-Types + // e.g. https://github.com/GoogleChrome/lighthouse/pull/11068#discussion_r451682796 + if (elements.length > 1) { + throw new Error(`Content cannot appear outside plural or select ICU messages. Instead, repeat that content in each option (message: '${lhlMessage}')`); + } + + for (const option of Object.values(element.options)) { + validate(option.value); + } } } } + + validate(parsedMessageElements); } /** @@ -389,7 +389,7 @@ function _processPlaceholderDirectIcu(icu, examples) { for (const [key, value] of Object.entries(examples)) { // Make sure all examples have ICU vars if (!icu.message.includes(`{${key}}`)) { - throw Error(`Example '${key}' provided, but has not corresponding ICU replacement in message "${icu.message}"`); + throw Error(`Example '${key}' provided, but has no corresponding ICU replacement in message "${icu.message}"`); } const eName = `ICU_${idx++}`; tempMessage = tempMessage.replace(`{${key}}`, `$${eName}$`); @@ -517,7 +517,7 @@ function parseUIStrings(sourceStr, liveUIStrings) { const key = getIdentifier(property); // Use live message to avoid having to e.g. concat strings broken into parts. - const message = liveUIStrings[key]; + const message = (liveUIStrings[key]); // @ts-expect-error - Not part of the public tsc interface yet. const jsDocComments = tsc.getJSDocCommentsAndTags(property); diff --git a/core/scripts/i18n/prune-obsolete-lhl-messages.js b/core/scripts/i18n/prune-obsolete-lhl-messages.js index e34eb42f8ded..6b10110919f1 100644 --- a/core/scripts/i18n/prune-obsolete-lhl-messages.js +++ b/core/scripts/i18n/prune-obsolete-lhl-messages.js @@ -8,9 +8,9 @@ import fs from 'fs'; import path from 'path'; import glob from 'glob'; -import MessageParser from 'intl-messageformat-parser'; +import MessageParser from '@formatjs/icu-messageformat-parser'; -import {collectAllCustomElementsFromICU} from '../../../shared/localization/format.js'; +import {collectAllCustomElementsFromICU, escapeIcuMessage} from '../../../shared/localization/format.js'; import {LH_ROOT} from '../../../shared/root.js'; import {readJson} from '../../test/test-utils.js'; @@ -24,8 +24,10 @@ import {readJson} from '../../test/test-utils.js'; * @return {boolean} */ function equalArguments(goldenArgumentIds, lhlMessage) { - const parsedMessage = MessageParser.parse(lhlMessage); - const lhlArgumentElements = collectAllCustomElementsFromICU(parsedMessage.elements); + const parsedMessageElements = MessageParser.parse(escapeIcuMessage(lhlMessage), { + ignoreTag: true, + }); + const lhlArgumentElements = collectAllCustomElementsFromICU(parsedMessageElements); const lhlArgumentIds = [...lhlArgumentElements.keys()]; if (goldenArgumentIds.length !== lhlArgumentIds.length) return false; @@ -96,8 +98,8 @@ function getGoldenLocaleArgumentIds(goldenLhl) { const goldenLocaleArgumentIds = {}; for (const [messageId, {message}] of Object.entries(goldenLhl)) { - const parsedMessage = MessageParser.parse(message); - const goldenArgumentElements = collectAllCustomElementsFromICU(parsedMessage.elements); + const parsedMessageElements = MessageParser.parse(escapeIcuMessage(message), {ignoreTag: true}); + const goldenArgumentElements = collectAllCustomElementsFromICU(parsedMessageElements); const goldenArgumentIds = [...goldenArgumentElements.keys()].sort(); goldenLocaleArgumentIds[messageId] = goldenArgumentIds; diff --git a/core/test/scripts/i18n/collect-strings-test.js b/core/test/scripts/i18n/collect-strings-test.js index 12148ab09c71..71f62cf81460 100644 --- a/core/test/scripts/i18n/collect-strings-test.js +++ b/core/test/scripts/i18n/collect-strings-test.js @@ -337,7 +337,7 @@ describe('#_lhlValidityChecks', () => { it('errors when using non-supported custom-formatted ICU format', () => { const message = 'Hello World took {var, badFormat, milliseconds}.'; expect(() => collect.convertMessageToCtc(message)).toThrow( - /Did not find the expected syntax \(one of 'number', 'date', 'time', 'plural', 'selectordinal', 'select'\) in message "Hello World took {var, badFormat, milliseconds}."$/); + /\[INVALID_ARGUMENT_TYPE\] Did not find the expected syntax in message: Hello World took {var, badFormat, milliseconds}.$/); }); it('errors when there is content outside of a plural argument', () => { @@ -370,14 +370,14 @@ describe('#_lhlValidityChecks', () => { /Content cannot appear outside plural or select ICU messages.*=1 {1 request} other {# requests}}'\)$/); }); - it('errors when there is content outside of nested plural aguments', () => { + it('errors when there is content outside of nested plural arguments', () => { const message = `{user_gender, select, female {Ms. {name} received {count, plural, =1 {one award.} other {# awards.}}} male {Mr. {name} received {count, plural, =1 {one award.} other {# awards.}}} other {{name} received {count, plural, =1 {one award.} other {# awards.}}} }`; expect(() => collect.convertMessageToCtc(message, {name: 'Elbert'})).toThrow( - /Content cannot appear outside plural or select ICU messages.*\(message: 'Ms. {name} received {count, plural, =1 {one award.} other {# awards.}}'\)$/); + /Content cannot appear outside plural or select ICU messages.*\(message: '{user_gender, select/); }); /* eslint-enable max-len */ }); @@ -562,7 +562,7 @@ describe('Convert Message to Placeholder', () => { const message = 'Hello name.'; expect(() => collect.convertMessageToCtc(message, {name: 'Mary'})) // eslint-disable-next-line max-len - .toThrow(/Example 'name' provided, but has not corresponding ICU replacement in message "Hello name."/); + .toThrow(/Example 'name' provided, but has no corresponding ICU replacement in message "Hello name."/); }); it('errors when direct ICU has no examples', () => { diff --git a/package.json b/package.json index ab6c97d6fd67..d56476c5147c 100644 --- a/package.json +++ b/package.json @@ -98,6 +98,7 @@ }, "devDependencies": { "@build-tracker/cli": "^1.0.0-beta.15", + "@formatjs/icu-messageformat-parser": "^2.6.2", "@esbuild-kit/esm-loader": "^2.1.1", "@esbuild-plugins/node-modules-polyfill": "^0.1.4", "@jest/fake-timers": "^28.1.0", @@ -152,7 +153,6 @@ "gh-pages": "^2.0.1", "glob": "^7.1.3", "idb-keyval": "2.2.0", - "intl-messageformat-parser": "^1.8.1", "jest-mock": "^27.3.0", "jest-snapshot": "^28.1.0", "jsdom": "^12.2.0", @@ -188,7 +188,7 @@ "devtools-protocol": "0.0.1200039", "enquirer": "^2.3.6", "http-link-header": "^1.1.1", - "intl-messageformat": "^4.4.0", + "intl-messageformat": "^10.5.3", "jpeg-js": "^0.4.4", "js-library-detector": "^6.7.0", "lighthouse-logger": "^2.0.1", diff --git a/shared/localization/format.js b/shared/localization/format.js index 829777de796a..389d27b3c90a 100644 --- a/shared/localization/format.js +++ b/shared/localization/format.js @@ -12,6 +12,19 @@ import {getModuleDirectory} from '../esm-utils.js'; import {isObjectOfUnknownValues, isObjectOrArrayOfUnknownValues} from '../type-verifiers.js'; import {locales} from './locales.js'; +// From @formatjs/icu-messageformat-parser - copy here so we don't need to bundle all that. +const TYPE = /** @type {const} */ ({ + literal: 0, + argument: 1, + number: 2, + date: 3, + time: 4, + select: 5, + plural: 6, + pound: 7, + tag: 8, +}); + const moduleDir = getModuleDirectory(import.meta); /** Contains available locales with messages. May be an empty object if bundled. */ @@ -30,11 +43,11 @@ const CANONICAL_LOCALES = fs.readdirSync(moduleDir + '/locales/') .map(locale => locale.replace('.json', '')) .sort(); -/** @typedef {import('intl-messageformat-parser').Element} MessageElement */ -/** @typedef {import('intl-messageformat-parser').ArgumentElement} ArgumentElement */ +/** @typedef {import('@formatjs/icu-messageformat-parser').MessageFormatElement} MessageFormatElement */ const MESSAGE_I18N_ID_REGEX = / | [^\s]+$/; +/** @type {Partial} */ const formats = { number: { bytes: { @@ -57,40 +70,39 @@ const formats = { }; /** - * Function to retrieve all 'argumentElement's from an ICU message. An argumentElement - * is an ICU element with an argument in it, like '{varName}' or '{varName, number, bytes}'. This - * differs from 'messageElement's which are just arbitrary text in a message. + * Function to retrieve all elements from an ICU message AST that are associated + * with a named input, like '{varName}' or '{varName, number, bytes}'. This + * differs from literal message types which are just arbitrary text. * - * Notes: - * This function will recursively inspect plural elements for nested argumentElements. + * This function recursively inspects plural elements for nested elements, + * but since the output is a Map they are deduplicated. + * e.g. "=1{hello {icu}} =other{hello {icu}}" will produce one element in the output, + * with "icu" as its key. * - * We need to find all the elements from the plural format sections, but - * they need to be deduplicated. I.e. "=1{hello {icu}} =other{hello {icu}}" - * the variable "icu" would appear twice if it wasn't de duplicated. And they cannot - * be stored in a set because they are not equal since their locations are different, - * thus they are stored via a Map keyed on the "id" which is the ICU varName. + * TODO: don't do that deduplication because messages within a plural message could be number + * messages with different styles. * - * @param {Array} icuElements - * @param {Map} [seenElementsById] - * @return {Map} + * @param {Array} icuElements + * @param {Map} [customElements] + * @return {Map} */ -function collectAllCustomElementsFromICU(icuElements, seenElementsById = new Map()) { +function collectAllCustomElementsFromICU(icuElements, customElements = new Map()) { for (const el of icuElements) { - // We are only interested in elements that need ICU formatting (argumentElements) - if (el.type !== 'argumentElement') continue; + if (el.type === TYPE.literal || el.type === TYPE.pound) continue; - seenElementsById.set(el.id, el); + customElements.set(el.value, el); // Plurals need to be inspected recursively - if (!el.format || el.format.type !== 'pluralFormat') continue; - // Look at all options of the plural (=1{} =other{}...) - for (const option of el.format.options) { - // Run collections on each option's elements - collectAllCustomElementsFromICU(option.value.elements, seenElementsById); + if (el.type === TYPE.plural) { + // Look at all options of the plural (=1{} =other{}...) + for (const option of Object.values(el.options)) { + // Run collections on each option's elements + collectAllCustomElementsFromICU(option.value, customElements); + } } } - return seenElementsById; + return customElements; } /** @@ -103,15 +115,14 @@ function collectAllCustomElementsFromICU(icuElements, seenElementsById = new Map * @return {Record} */ function _preformatValues(messageFormatter, values = {}, lhlMessage) { - const elementMap = collectAllCustomElementsFromICU(messageFormatter.getAst().elements); - const argumentElements = [...elementMap.values()]; + const customElements = collectAllCustomElementsFromICU(messageFormatter.getAst()); /** @type {Record} */ const formattedValues = {}; - for (const {id, format} of argumentElements) { + for (const [id, element] of customElements) { // Throw an error if a message's value isn't provided - if (id && (id in values) === false) { + if (!(id in values)) { throw new Error(`ICU Message "${lhlMessage}" contains a value reference ("${id}") ` + `that wasn't provided`); } @@ -119,7 +130,7 @@ function _preformatValues(messageFormatter, values = {}, lhlMessage) { const value = values[id]; // Direct `{id}` replacement and non-numeric values need no formatting. - if (!format || format.type !== 'numberFormat') { + if (element.type !== TYPE.number) { formattedValues[id] = value; continue; } @@ -130,13 +141,13 @@ function _preformatValues(messageFormatter, values = {}, lhlMessage) { } // Format values for known styles. - if (format.style === 'milliseconds') { + if (element.style === 'milliseconds') { // Round all milliseconds to the nearest 10. formattedValues[id] = Math.round(value / 10) * 10; - } else if (format.style === 'seconds' && id === 'timeInMs') { + } else if (element.style === 'seconds' && id === 'timeInMs') { // Convert all seconds to the correct unit (currently only for `timeInMs`). formattedValues[id] = Math.round(value / 100) / 10; - } else if (format.style === 'bytes') { + } else if (element.style === 'bytes') { // Replace all the bytes with KB. formattedValues[id] = value / 1024; } else { @@ -162,6 +173,19 @@ function _preformatValues(messageFormatter, values = {}, lhlMessage) { return formattedValues; } +/** + * Our strings were made when \ was the escape character, but now it is '. To avoid churn, + * let's convert to the new style in memory. + * @param {string} message + * @return {string} + */ +function escapeIcuMessage(message) { + return message + .replace(/'/g, `''`) + .replace(/\\{/g, `'{`) + .replace(/\\}/g, `'}`); +} + /** * Format string `message` by localizing `values` and inserting them. `message` * is assumed to already be in the given locale. @@ -172,19 +196,31 @@ function _preformatValues(messageFormatter, values = {}, lhlMessage) { * @return {string} */ function formatMessage(message, values, locale) { + message = escapeIcuMessage(message); + // Parsing and formatting can be slow. Don't attempt if the string can't // contain ICU placeholders, in which case formatting is already complete. - if (!message.includes('{') && values === undefined) return message; // When using accented english, force the use of a different locale for number formatting. const localeForMessageFormat = (locale === 'en-XA' || locale === 'en-XL') ? 'de-DE' : locale; - const formatter = new IntlMessageFormat(message, localeForMessageFormat, formats); + // This package is not correctly bundled. + /** @type {typeof IntlMessageFormat} */ + // @ts-expect-error bundler woes + const IntlMessageFormatCtor = IntlMessageFormat.IntlMessageFormat || IntlMessageFormat; + const formatter = new IntlMessageFormatCtor(message, localeForMessageFormat, formats, { + ignoreTag: true, + }); // Preformat values for the message format like KB and milliseconds. const valuesForMessageFormat = _preformatValues(formatter, values, message); - return formatter.format(valuesForMessageFormat); + const formattedResult = formatter.format(valuesForMessageFormat); + // We only format to strings. + if (typeof formattedResult !== 'string') { + throw new Error('unexpected formatted result'); + } + return formattedResult; } /** @@ -459,4 +495,5 @@ export { getIcuMessageIdParts, getAvailableLocales, getCanonicalLocales, + escapeIcuMessage, }; diff --git a/yarn.lock b/yarn.lock index 7da2e57babb9..f7e74f05b3fa 100644 --- a/yarn.lock +++ b/yarn.lock @@ -831,6 +831,45 @@ resolved "https://registry.yarnpkg.com/@firebase/webchannel-wrapper/-/webchannel-wrapper-0.5.1.tgz#a64d1af3c62e3bb89576ec58af880980a562bf4e" integrity sha512-dZMzN0uAjwJXWYYAcnxIwXqRTZw3o14hGe7O6uhwjD1ZQWPVYA5lASgnNskEBra0knVBsOXB4KXg+HnlKewN/A== +"@formatjs/ecma402-abstract@1.17.2": + version "1.17.2" + resolved "https://registry.yarnpkg.com/@formatjs/ecma402-abstract/-/ecma402-abstract-1.17.2.tgz#d197c6e26b9fd96ff7ba3b3a0cc2f25f1f2dcac3" + integrity sha512-k2mTh0m+IV1HRdU0xXM617tSQTi53tVR2muvYOsBeYcUgEAyxV1FOC7Qj279th3fBVQ+Dj6muvNJZcHSPNdbKg== + dependencies: + "@formatjs/intl-localematcher" "0.4.2" + tslib "^2.4.0" + +"@formatjs/fast-memoize@2.2.0": + version "2.2.0" + resolved "https://registry.yarnpkg.com/@formatjs/fast-memoize/-/fast-memoize-2.2.0.tgz#33bd616d2e486c3e8ef4e68c99648c196887802b" + integrity sha512-hnk/nY8FyrL5YxwP9e4r9dqeM6cAbo8PeU9UjyXojZMNvVad2Z06FAVHyR3Ecw6fza+0GH7vdJgiKIVXTMbSBA== + dependencies: + tslib "^2.4.0" + +"@formatjs/icu-messageformat-parser@2.6.2", "@formatjs/icu-messageformat-parser@^2.6.2": + version "2.6.2" + resolved "https://registry.yarnpkg.com/@formatjs/icu-messageformat-parser/-/icu-messageformat-parser-2.6.2.tgz#9bbb29099416e4ce2c7df50029c48985d4f901b3" + integrity sha512-nF/Iww7sc5h+1MBCDRm68qpHTCG4xvGzYs/x9HFcDETSGScaJ1Fcadk5U/NXjXeCtzD+DhN4BAwKFVclHfKMdA== + dependencies: + "@formatjs/ecma402-abstract" "1.17.2" + "@formatjs/icu-skeleton-parser" "1.6.2" + tslib "^2.4.0" + +"@formatjs/icu-skeleton-parser@1.6.2": + version "1.6.2" + resolved "https://registry.yarnpkg.com/@formatjs/icu-skeleton-parser/-/icu-skeleton-parser-1.6.2.tgz#00303034dc08583973c8aa67b96534c49c0bad8d" + integrity sha512-VtB9Slo4ZL6QgtDFJ8Injvscf0xiDd4bIV93SOJTBjUF4xe2nAWOoSjLEtqIG+hlIs1sNrVKAaFo3nuTI4r5ZA== + dependencies: + "@formatjs/ecma402-abstract" "1.17.2" + tslib "^2.4.0" + +"@formatjs/intl-localematcher@0.4.2": + version "0.4.2" + resolved "https://registry.yarnpkg.com/@formatjs/intl-localematcher/-/intl-localematcher-0.4.2.tgz#7e6e596dbaf2f0c5a7c22da5a01d5c55f4c37e9a" + integrity sha512-BGdtJFmaNJy5An/Zan4OId/yR9Ih1OojFjcduX/xOvq798OgWSyDtd6Qd5jqJXwJs1ipe4Fxu9+cshic5Ox2tA== + dependencies: + tslib "^2.4.0" + "@grpc/grpc-js@^1.3.2": version "1.3.7" resolved "https://registry.yarnpkg.com/@grpc/grpc-js/-/grpc-js-1.3.7.tgz#58b687aff93b743aafde237fd2ee9a3259d7f2d8" @@ -4271,7 +4310,17 @@ intl-messageformat-parser@^1.8.1: resolved "https://registry.yarnpkg.com/intl-messageformat-parser/-/intl-messageformat-parser-1.8.1.tgz#0eb14c5618333be4c95c409457b66c8c33ddcc01" integrity sha512-IMSCKVf0USrM/959vj3xac7s8f87sc+80Y/ipBzdKy4ifBv5Gsj2tZ41EAaURVg01QU71fYr77uA8Meh6kELbg== -intl-messageformat@^4.1.2, intl-messageformat@^4.4.0: +intl-messageformat@^10.5.3: + version "10.5.3" + resolved "https://registry.yarnpkg.com/intl-messageformat/-/intl-messageformat-10.5.3.tgz#db0779d4a1988faa2977d76574489b7a25f0d5d0" + integrity sha512-TzKn1uhJBMyuKTO4zUX47SU+d66fu1W9tVzIiZrQ6hBqQQeYscBMIzKL/qEXnFbJrH9uU5VV3+T5fWib4SIcKA== + dependencies: + "@formatjs/ecma402-abstract" "1.17.2" + "@formatjs/fast-memoize" "2.2.0" + "@formatjs/icu-messageformat-parser" "2.6.2" + tslib "^2.4.0" + +intl-messageformat@^4.1.2: version "4.4.0" resolved "https://registry.yarnpkg.com/intl-messageformat/-/intl-messageformat-4.4.0.tgz#aa196a4d04b573f4090bc417f982d81de4f74fad" integrity sha512-z+Bj2rS3LZSYU4+sNitdHrwnBhr0wO80ZJSW8EzKDBowwUe3Q/UsvgCGjrwa+HPzoGCLEb9HAjfJgo4j2Sac8w== @@ -7000,6 +7049,11 @@ tslib@^2.0.1, tslib@^2.1.0: resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.5.3.tgz#24944ba2d990940e6e982c4bea147aba80209913" integrity sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w== +tslib@^2.4.0: + version "2.6.2" + resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.6.2.tgz#703ac29425e7b37cd6fd456e92404d46d1f3e4ae" + integrity sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q== + tsutils@^3.21.0: version "3.21.0" resolved "https://registry.yarnpkg.com/tsutils/-/tsutils-3.21.0.tgz#b48717d394cea6c1e096983eed58e9d61715b623"