Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

misc: refactor collect-strings.js to use glob and work on windows #9406

Merged
merged 17 commits into from
Jul 26, 2019
157 changes: 78 additions & 79 deletions lighthouse-core/scripts/i18n/collect-strings.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
/* eslint-disable no-console, max-len */

const fs = require('fs');
const glob = require('glob');
const path = require('path');
const assert = require('assert');
const esprima = require('esprima');
Expand All @@ -20,15 +21,14 @@ const UISTRINGS_REGEX = /UIStrings = (.|\s)*?\};\n/im;
/** @typedef {import('./bake-ctc-to-lhl.js').ICUMessageDefn} ICUMessageDefn */

const ignoredPathComponents = [
'/.git',
'/scripts',
'/node_modules',
'/test/',
'-test.js',
'-renderer.js',
'**/.git/**',
'**/scripts/**',
'**/node_modules/**',
'**/test/**',
'**/*-test.js',
'**/*-renderer.js',
];


// @ts-ignore - @types/esprima lacks all of these
function computeDescription(ast, property, value, startRange) {
const endRange = property.range[0];
Expand Down Expand Up @@ -328,6 +328,7 @@ function _ctcSanityChecks(icu) {
* messages themselves while leaving placeholders untouched.
*
* @param {Record<string, ICUMessageDefn>} messages
* @return {Record<string, ICUMessageDefn>}
*/
function createPsuedoLocaleStrings(messages) {
/** @type {Record<string, ICUMessageDefn>} */
Expand Down Expand Up @@ -382,82 +383,79 @@ const seenStrings = new Map();
let collisions = 0;

/**
* @param {string} dir
* @param {Record<string, ICUMessageDefn>} strings
* Collects all LHL messsages defined in UIString from Javascript files in dir,
* and converts them into CTC.
* @param {string} dir absolute path
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment on this, something like

 * Collects all lhl messsages defined in UIString vars from within Javascript files located in dir, 
 * and converts them into ctc format.

* @return {Record<string, ICUMessageDefn>}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea with the returning, I like this better!

*/
function collectAllStringsInDir(dir, strings = {}) {
for (const name of fs.readdirSync(dir)) {
const fullPath = path.join(dir, name);
const relativePath = path.relative(LH_ROOT, fullPath);
if (ignoredPathComponents.some(p => fullPath.includes(p))) continue;

if (fs.statSync(fullPath).isDirectory()) {
collectAllStringsInDir(fullPath, strings);
} else {
if (name.endsWith('.js')) {
if (!process.env.CI) console.log('Collecting from', relativePath);
const content = fs.readFileSync(fullPath, 'utf8');
const exportVars = require(fullPath);
const regexMatches = !!UISTRINGS_REGEX.test(content);
const exportsUIStrings = !!exportVars.UIStrings;
if (!regexMatches && !exportsUIStrings) continue;

if (regexMatches && !exportsUIStrings) {
throw new Error('UIStrings defined but not exported');
}

if (exportsUIStrings && !regexMatches) {
throw new Error('UIStrings exported but no definition found');
}

// @ts-ignore regex just matched
const justUIStrings = 'const ' + content.match(UISTRINGS_REGEX)[0];
// just parse the UIStrings substring to avoid ES version issues, save time, etc
// @ts-ignore - esprima's type definition is supremely lacking
const ast = esprima.parse(justUIStrings, {comment: true, range: true});

for (const stmt of ast.body) {
if (stmt.type !== 'VariableDeclaration') continue;
if (stmt.declarations[0].id.name !== 'UIStrings') continue;

let lastPropertyEndIndex = 0;
for (const property of stmt.declarations[0].init.properties) {
const key = property.key.name;
const val = exportVars.UIStrings[key];
const {description, examples} = computeDescription(ast, property, val, lastPropertyEndIndex);

const converted = convertMessageToCtc(val, examples);

const messageKey = `${relativePath} | ${key}`;

/** @type {ICUMessageDefn} */
const icuDefn = {
message: converted.message,
description,
placeholders: converted.placeholders,
};

// check for duplicates, if duplicate, add @description as @meaning to both
if (seenStrings.has(icuDefn.message)) {
icuDefn.meaning = icuDefn.description;
const seenId = seenStrings.get(icuDefn.message);
if (seenId) {
if (!strings[seenId].meaning) {
strings[seenId].meaning = strings[seenId].description;
collisions++;
}
collisions++;
}
}

seenStrings.set(icuDefn.message, messageKey);
function collectAllStringsInDir(dir) {
/** @type {Record<string, ICUMessageDefn>} */
const strings = {};

const globPattern = path.join(path.relative(LH_ROOT, dir), '/**/*.js');
const files = glob.sync(globPattern, {
cwd: LH_ROOT,
ignore: ignoredPathComponents,
});
for (const relativeToRootPath of files) {
const absolutePath = path.join(LH_ROOT, relativeToRootPath);
if (!process.env.CI) console.log('Collecting from', relativeToRootPath);

const content = fs.readFileSync(absolutePath, 'utf8');
const exportVars = require(absolutePath);
const regexMatches = UISTRINGS_REGEX.test(content);
const exportsUIStrings = Boolean(exportVars.UIStrings);
if (!regexMatches && !exportsUIStrings) continue;

if (regexMatches && !exportsUIStrings) {
throw new Error('UIStrings defined but not exported');
}

strings[messageKey] = icuDefn;
if (exportsUIStrings && !regexMatches) {
throw new Error('UIStrings exported but no definition found');
}

lastPropertyEndIndex = property.range[1];
// @ts-ignore regex just matched
const justUIStrings = 'const ' + content.match(UISTRINGS_REGEX)[0];
// just parse the UIStrings substring to avoid ES version issues, save time, etc
// @ts-ignore - esprima's type definition is supremely lacking
const ast = esprima.parse(justUIStrings, {comment: true, range: true});

for (const stmt of ast.body) {
if (stmt.type !== 'VariableDeclaration') continue;
if (stmt.declarations[0].id.name !== 'UIStrings') continue;

let lastPropertyEndIndex = 0;
for (const property of stmt.declarations[0].init.properties) {
const key = property.key.name;
const val = exportVars.UIStrings[key];
const {description, examples} = computeDescription(ast, property, val, lastPropertyEndIndex);
const converted = convertMessageToCtc(val, examples);
const messageKey = `${relativeToRootPath} | ${key}`;

/** @type {ICUMessageDefn} */
const icuDefn = {
message: converted.message,
description,
placeholders: converted.placeholders,
};

// check for duplicates, if duplicate, add @description as @meaning to both
if (seenStrings.has(icuDefn.message)) {
icuDefn.meaning = icuDefn.description;
const seenId = seenStrings.get(icuDefn.message);
if (seenId) {
if (!strings[seenId].meaning) {
strings[seenId].meaning = strings[seenId].description;
collisions++;
}
collisions++;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 to @paulirish for keeping this global so this isn't a problem 😄

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry i have no idea what your comment is referencing but this line of code looks like i screwed up a merge?

is it supposed to double count collisions

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nvm looks good

}
}

seenStrings.set(icuDefn.message, messageKey);
strings[messageKey] = icuDefn;
lastPropertyEndIndex = property.range[1];
}
}
}
Expand All @@ -483,17 +481,18 @@ function writeStringsToCtcFiles(locale, strings) {

// @ts-ignore Test if called from the CLI or as a module.
if (require.main === module) {
const strings = collectAllStringsInDir(path.join(LH_ROOT, 'lighthouse-core'));
const coreStrings = collectAllStringsInDir(path.join(LH_ROOT, 'lighthouse-core'));
console.log('Collected from LH core!');

collectAllStringsInDir(path.join(LH_ROOT, 'stack-packs/packs'), strings);
const stackPackStrings = collectAllStringsInDir(path.join(LH_ROOT, 'stack-packs/packs'));
console.log('Collected from Stack Packs!');

if ((collisions) > 0) {
assert.equal(collisions, 15, 'The number of duplicate strings have changed, update this assertion if that is expected, or reword strings.');
console.log(`MEANING COLLISION: ${collisions} string(s) have the same content.`);
}

const strings = {...coreStrings, ...stackPackStrings};
writeStringsToCtcFiles('en-US', strings);
console.log('Written to disk!', 'en-US.ctc.json');
// Generate local pseudolocalized files for debugging while translating
Expand Down