-
Notifications
You must be signed in to change notification settings - Fork 9.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
misc: refactor collect-strings.js to use glob and work on windows #9406
Changes from 14 commits
70088e1
5940f4f
5b7f6ac
aa2de69
8530cc8
2986d64
6d30674
3d6b326
c55a900
8d74620
8ef266e
3b778bf
baf44d7
726af3b
8cb8b59
3e3a5f5
c33a1ac
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,7 @@ | |
/* eslint-disable no-console, max-len */ | ||
|
||
const fs = require('fs'); | ||
const glob = require('glob'); | ||
const path = require('path'); | ||
const assert = require('assert'); | ||
const esprima = require('esprima'); | ||
|
@@ -20,15 +21,14 @@ const UISTRINGS_REGEX = /UIStrings = (.|\s)*?\};\n/im; | |
/** @typedef {import('./bake-ctc-to-lhl.js').ICUMessageDefn} ICUMessageDefn */ | ||
|
||
const ignoredPathComponents = [ | ||
'/.git', | ||
'/scripts', | ||
'/node_modules', | ||
'/test/', | ||
'-test.js', | ||
'-renderer.js', | ||
'**/.git/**', | ||
'**/scripts/**', | ||
'**/node_modules/**', | ||
'**/test/**', | ||
'**/*-test.js', | ||
'**/*-renderer.js', | ||
]; | ||
|
||
|
||
// @ts-ignore - @types/esprima lacks all of these | ||
function computeDescription(ast, property, value, startRange) { | ||
const endRange = property.range[0]; | ||
|
@@ -113,13 +113,9 @@ function convertMessageToCtc(message, examples = {}) { | |
|
||
// Process each placeholder type | ||
_processPlaceholderMarkdownCode(icuDefn); | ||
|
||
_processPlaceholderMarkdownLink(icuDefn); | ||
|
||
_processPlaceholderCustomFormattedIcu(icuDefn); | ||
|
||
_processPlaceholderDirectIcu(icuDefn, examples); | ||
|
||
_ctcSanityChecks(icuDefn); | ||
|
||
if (Object.entries(icuDefn.placeholders).length === 0) { | ||
|
@@ -328,6 +324,7 @@ function _ctcSanityChecks(icu) { | |
* messages themselves while leaving placeholders untouched. | ||
* | ||
* @param {Record<string, ICUMessageDefn>} messages | ||
* @return {Record<string, ICUMessageDefn>} | ||
*/ | ||
function createPsuedoLocaleStrings(messages) { | ||
/** @type {Record<string, ICUMessageDefn>} */ | ||
|
@@ -382,82 +379,77 @@ const seenStrings = new Map(); | |
let collisions = 0; | ||
|
||
/** | ||
* @param {string} dir | ||
* @param {Record<string, ICUMessageDefn>} strings | ||
* @param {string} dir absolute path | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Comment on this, something like
|
||
* @return {Record<string, ICUMessageDefn>} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good idea with the returning, I like this better! |
||
*/ | ||
function collectAllStringsInDir(dir, strings = {}) { | ||
for (const name of fs.readdirSync(dir)) { | ||
const fullPath = path.join(dir, name); | ||
const relativePath = path.relative(LH_ROOT, fullPath); | ||
if (ignoredPathComponents.some(p => fullPath.includes(p))) continue; | ||
|
||
if (fs.statSync(fullPath).isDirectory()) { | ||
collectAllStringsInDir(fullPath, strings); | ||
} else { | ||
if (name.endsWith('.js')) { | ||
if (!process.env.CI) console.log('Collecting from', relativePath); | ||
const content = fs.readFileSync(fullPath, 'utf8'); | ||
const exportVars = require(fullPath); | ||
const regexMatches = !!UISTRINGS_REGEX.test(content); | ||
const exportsUIStrings = !!exportVars.UIStrings; | ||
if (!regexMatches && !exportsUIStrings) continue; | ||
|
||
if (regexMatches && !exportsUIStrings) { | ||
throw new Error('UIStrings defined but not exported'); | ||
} | ||
|
||
if (exportsUIStrings && !regexMatches) { | ||
throw new Error('UIStrings exported but no definition found'); | ||
} | ||
|
||
// @ts-ignore regex just matched | ||
const justUIStrings = 'const ' + content.match(UISTRINGS_REGEX)[0]; | ||
// just parse the UIStrings substring to avoid ES version issues, save time, etc | ||
// @ts-ignore - esprima's type definition is supremely lacking | ||
const ast = esprima.parse(justUIStrings, {comment: true, range: true}); | ||
|
||
for (const stmt of ast.body) { | ||
if (stmt.type !== 'VariableDeclaration') continue; | ||
if (stmt.declarations[0].id.name !== 'UIStrings') continue; | ||
|
||
let lastPropertyEndIndex = 0; | ||
for (const property of stmt.declarations[0].init.properties) { | ||
const key = property.key.name; | ||
const val = exportVars.UIStrings[key]; | ||
const {description, examples} = computeDescription(ast, property, val, lastPropertyEndIndex); | ||
|
||
const converted = convertMessageToCtc(val, examples); | ||
|
||
const messageKey = `${relativePath} | ${key}`; | ||
|
||
/** @type {ICUMessageDefn} */ | ||
const icuDefn = { | ||
message: converted.message, | ||
description, | ||
placeholders: converted.placeholders, | ||
}; | ||
|
||
// check for duplicates, if duplicate, add @description as @meaning to both | ||
if (seenStrings.has(icuDefn.message)) { | ||
icuDefn.meaning = icuDefn.description; | ||
const seenId = seenStrings.get(icuDefn.message); | ||
if (seenId) { | ||
if (!strings[seenId].meaning) { | ||
strings[seenId].meaning = strings[seenId].description; | ||
collisions++; | ||
} | ||
collisions++; | ||
} | ||
} | ||
|
||
seenStrings.set(icuDefn.message, messageKey); | ||
function collectAllStringsInDir(dir) { | ||
/** @type {Record<string, ICUMessageDefn>} */ | ||
const strings = {}; | ||
|
||
const globPattern = path.join(path.relative(LH_ROOT, dir), '/**/*.js'); | ||
const files = glob.sync(globPattern, { | ||
cwd: LH_ROOT, | ||
ignore: ignoredPathComponents, | ||
}); | ||
for (const relativeToRootPath of files) { | ||
const absolutePath = path.join(LH_ROOT, relativeToRootPath); | ||
if (!process.env.CI) console.log('Collecting from', relativeToRootPath); | ||
|
||
const content = fs.readFileSync(absolutePath, 'utf8'); | ||
const exportVars = require(absolutePath); | ||
const regexMatches = UISTRINGS_REGEX.test(content); | ||
const exportsUIStrings = Boolean(exportVars.UIStrings); | ||
if (!regexMatches && !exportsUIStrings) continue; | ||
|
||
if (regexMatches && !exportsUIStrings) { | ||
throw new Error('UIStrings defined but not exported'); | ||
} | ||
|
||
strings[messageKey] = icuDefn; | ||
if (exportsUIStrings && !regexMatches) { | ||
throw new Error('UIStrings exported but no definition found'); | ||
} | ||
|
||
lastPropertyEndIndex = property.range[1]; | ||
// @ts-ignore regex just matched | ||
const justUIStrings = 'const ' + content.match(UISTRINGS_REGEX)[0]; | ||
// just parse the UIStrings substring to avoid ES version issues, save time, etc | ||
// @ts-ignore - esprima's type definition is supremely lacking | ||
const ast = esprima.parse(justUIStrings, {comment: true, range: true}); | ||
|
||
for (const stmt of ast.body) { | ||
if (stmt.type !== 'VariableDeclaration') continue; | ||
if (stmt.declarations[0].id.name !== 'UIStrings') continue; | ||
|
||
let lastPropertyEndIndex = 0; | ||
for (const property of stmt.declarations[0].init.properties) { | ||
const key = property.key.name; | ||
const val = exportVars.UIStrings[key]; | ||
const {description, examples} = computeDescription(ast, property, val, lastPropertyEndIndex); | ||
const converted = convertMessageToCtc(val, examples); | ||
const messageKey = `${relativeToRootPath} | ${key}`; | ||
|
||
/** @type {ICUMessageDefn} */ | ||
const icuDefn = { | ||
message: converted.message, | ||
description, | ||
placeholders: converted.placeholders, | ||
}; | ||
|
||
// check for duplicates, if duplicate, add @description as @meaning to both | ||
if (seenStrings.has(icuDefn.message)) { | ||
icuDefn.meaning = icuDefn.description; | ||
const seenId = seenStrings.get(icuDefn.message); | ||
if (seenId) { | ||
if (!strings[seenId].meaning) { | ||
strings[seenId].meaning = strings[seenId].description; | ||
collisions++; | ||
} | ||
collisions++; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 to @paulirish for keeping this global so this isn't a problem 😄 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sorry i have no idea what your comment is referencing but this line of code looks like i screwed up a merge? is it supposed to double count collisions There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nvm looks good |
||
} | ||
} | ||
|
||
seenStrings.set(icuDefn.message, messageKey); | ||
strings[messageKey] = icuDefn; | ||
lastPropertyEndIndex = property.range[1]; | ||
} | ||
} | ||
} | ||
|
@@ -483,17 +475,18 @@ function writeStringsToCtcFiles(locale, strings) { | |
|
||
// @ts-ignore Test if called from the CLI or as a module. | ||
if (require.main === module) { | ||
const strings = collectAllStringsInDir(path.join(LH_ROOT, 'lighthouse-core')); | ||
const coreStrings = collectAllStringsInDir(path.join(LH_ROOT, 'lighthouse-core')); | ||
console.log('Collected from LH core!'); | ||
|
||
collectAllStringsInDir(path.join(LH_ROOT, 'stack-packs/packs'), strings); | ||
const stackPackStrings = collectAllStringsInDir(path.join(LH_ROOT, 'stack-packs/packs')); | ||
console.log('Collected from Stack Packs!'); | ||
|
||
if ((collisions) > 0) { | ||
assert.equal(collisions, 15, 'The number of duplicate strings have changed, update this assertion if that is expected, or reword strings.'); | ||
console.log(`MEANING COLLISION: ${collisions} string(s) have the same content.`); | ||
} | ||
|
||
const strings = {...coreStrings, ...stackPackStrings}; | ||
writeStringsToCtcFiles('en-US', strings); | ||
console.log('Written to disk!', 'en-US.ctc.json'); | ||
// Generate local pseudolocalized files for debugging while translating | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Whitespace like this comes down to author preference and should really be discussed in the original PR rather than changed in the next commit (inviting style edit wars). Presumably @exterkamp preferred it and no one brought it up in the review, so unless there's a reason to change it, the default move should be to leave it unchanged.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1 leave the spaces.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok sorry bud