diff --git a/lighthouse-core/scripts/lantern/print-correlations.js b/lighthouse-core/scripts/lantern/print-correlations.js index f0c758672507..6ad08d0fad1a 100755 --- a/lighthouse-core/scripts/lantern/print-correlations.js +++ b/lighthouse-core/scripts/lantern/print-correlations.js @@ -8,155 +8,31 @@ /* eslint-disable no-console */ -const fs = require('fs'); -const path = require('path'); -const constants = require('./constants'); - -const GOOD_ABSOLUTE_THRESHOLD = 0.2; -const OK_ABSOLUTE_THRESHOLD = 0.5; - -const GOOD_RANK_THRESHOLD = 0.1; - -const INPUT_PATH = process.argv[2] || constants.SITE_INDEX_WITH_GOLDEN_WITH_COMPUTED_PATH; -const COMPUTATIONS_PATH = path.resolve(process.cwd(), INPUT_PATH); - -if (!fs.existsSync(COMPUTATIONS_PATH)) throw new Error('Usage $0 '); - -/** @type {{sites: LanternSiteDefinition[]}} */ -const expectations = require(COMPUTATIONS_PATH); - -const entries = expectations.sites.filter(site => site.lantern); - -if (!entries.length) { - throw new Error('No lantern metrics available, did you run run-all-expectations.js'); -} - -/** @type {LanternEvaluation[]} */ -const totalGood = []; -/** @type {LanternEvaluation[]} */ -const totalOk = []; -/** @type {LanternEvaluation[]} */ -const totalBad = []; - -/** - * @param {keyof TargetMetrics} metric - * @param {keyof LanternMetrics} lanternMetric - */ -function evaluateBuckets(metric, lanternMetric) { - const good = []; - const ok = []; - const bad = []; - - // @ts-ignore - const sortedByMetric = entries.slice().sort((a, b) => a[metric] - b[metric]); - const sortedByLanternMetric = entries - .slice() - .sort((a, b) => a.lantern[lanternMetric] - b.lantern[lanternMetric]); - - const rankErrors = []; - const percentErrors = []; - for (const entry of entries) { - const expected = Math.round(entry.wpt3g[metric]); - if (expected === 0) continue; - - const expectedRank = sortedByMetric.indexOf(entry); - const actual = Math.round(entry.lantern[lanternMetric]); - const actualRank = sortedByLanternMetric.indexOf(entry); - const diff = Math.abs(actual - expected); - const diffAsPercent = diff / expected; - const rankDiff = Math.abs(expectedRank - actualRank); - const rankDiffAsPercent = rankDiff / entries.length; - - rankErrors.push(rankDiffAsPercent); - percentErrors.push(diffAsPercent); - const evaluation = {...entry, expected, actual, diff, rankDiff, rankDiffAsPercent, metric}; - if (diffAsPercent < GOOD_ABSOLUTE_THRESHOLD || rankDiffAsPercent < GOOD_RANK_THRESHOLD) { - good.push(evaluation); - } else if (diffAsPercent < OK_ABSOLUTE_THRESHOLD) { - ok.push(evaluation); - } else bad.push(evaluation); - } - - if (lanternMetric.includes('roughEstimate')) { - totalGood.push(...good); - totalOk.push(...ok); - totalBad.push(...bad); - } - - const MAPE = Math.round(percentErrors.reduce((x, y) => x + y) / percentErrors.length * 1000) / 10; - const rank = Math.round(rankErrors.reduce((x, y) => x + y) / rankErrors.length * 1000) / 10; - const buckets = `${good.length}/${ok.length}/${bad.length}`; - console.log( - metric.padEnd(30), - lanternMetric.padEnd(25), - `${rank}%`.padEnd(12), - `${MAPE}%`.padEnd(10), - buckets.padEnd(15) - ); -} - -console.log('---- Metric Stats ----'); -console.log( - 'metric'.padEnd(30), - 'estimate'.padEnd(25), - 'rank error'.padEnd(12), - 'MAPE'.padEnd(10), - 'Good/OK/Bad'.padEnd(15) -); -evaluateBuckets('firstContentfulPaint', 'optimisticFCP'); -evaluateBuckets('firstContentfulPaint', 'pessimisticFCP'); -evaluateBuckets('firstContentfulPaint', 'roughEstimateOfFCP'); - -evaluateBuckets('firstMeaningfulPaint', 'optimisticFMP'); -evaluateBuckets('firstMeaningfulPaint', 'pessimisticFMP'); -evaluateBuckets('firstMeaningfulPaint', 'roughEstimateOfFMP'); - -evaluateBuckets('timeToFirstInteractive', 'optimisticTTFCPUI'); -evaluateBuckets('timeToFirstInteractive', 'pessimisticTTFCPUI'); -evaluateBuckets('timeToFirstInteractive', 'roughEstimateOfTTFCPUI'); - -evaluateBuckets('timeToConsistentlyInteractive', 'optimisticTTI'); -evaluateBuckets('timeToConsistentlyInteractive', 'pessimisticTTI'); -evaluateBuckets('timeToConsistentlyInteractive', 'roughEstimateOfTTI'); - -evaluateBuckets('speedIndex', 'optimisticSI'); -evaluateBuckets('speedIndex', 'pessimisticSI'); -evaluateBuckets('speedIndex', 'roughEstimateOfSI'); - -const total = totalGood.length + totalOk.length + totalBad.length; -console.log('\n---- Summary Stats ----'); -console.log(`Good: ${Math.round((totalGood.length / total) * 100)}%`); -console.log(`OK: ${Math.round((totalOk.length / total) * 100)}%`); -console.log(`Bad: ${Math.round((totalBad.length / total) * 100)}%`); - -console.log('\n---- Worst10 Sites ----'); -for (const entry of totalBad.sort((a, b) => b.rankDiff - a.rankDiff).slice(0, 10)) { - console.log( - entry.actual < entry.expected ? 'underestimated' : 'overestimated', - entry.metric, - 'by', - Math.round(entry.diff), - 'on', - entry.url - ); -} - /** * @typedef LanternSiteDefinition * @property {string} url * @property {TargetMetrics} wpt3g * @property {LanternMetrics} lantern + * @property {LanternMetrics} baseline */ /** * @typedef LanternEvaluation * @property {string} url * @property {string} metric + * @property {string} lanternMetric * @property {number} expected * @property {number} actual * @property {number} diff - * @property {number} rankDiff - * @property {number} rankDiffAsPercent + * @property {number} diffAsPercent + */ + +/** + * @typedef EstimateEvaluationSummary + * @property {LanternEvaluation[]} evaluations + * @property {number} p50 + * @property {number} p90 + * @property {number} p95 */ /** @@ -186,3 +62,364 @@ for (const entry of totalBad.sort((a, b) => b.rankDiff - a.rankDiff).slice(0, 10 * @property {number} roughEstimateOfTTFCPUI * @property {number} roughEstimateOfTTI */ + +const fs = require('fs'); +const path = require('path'); +const constants = require('./constants'); +const chalk = require('chalk').default; + +const GOOD_DIFF_AS_PERCENT_THRESHOLD = 0.2; +const OK_DIFF_AS_PERCENT_THRESHOLD = 0.5; + +const INPUT_PATH = process.argv[2] || constants.SITE_INDEX_WITH_GOLDEN_WITH_COMPUTED_PATH; +const COMPUTATIONS_PATH = path.resolve(process.cwd(), INPUT_PATH); +const BASELINE_PATH = constants.MASTER_COMPUTED_PATH; + +if (!fs.existsSync(COMPUTATIONS_PATH)) throw new Error('Usage $0 '); + +/** @type {{sites: LanternSiteDefinition[]}} */ +const siteIndexWithComputed = require(COMPUTATIONS_PATH); +const baselineLanternData = require(BASELINE_PATH); + +for (const site of baselineLanternData.sites) { + const computedSite = siteIndexWithComputed.sites.find(entry => entry.url === site.url); + if (!computedSite) continue; + computedSite.baseline = site; +} + +const entries = siteIndexWithComputed.sites.filter(site => site.lantern && site.baseline); + +if (!entries.length) { + throw new Error('No lantern metrics available, did you run run-on-all-assets.js?'); +} + +/** + * @param {LanternSiteDefinition} site + * @param {TargetMetrics} expectedMetrics + * @param {LanternMetrics} actualMetrics + * @param {keyof TargetMetrics} metric + * @param {keyof LanternMetrics} lanternMetric + * @return {(LanternEvaluation & LanternSiteDefinition)|null} + */ +function evaluateSite(site, expectedMetrics, actualMetrics, metric, lanternMetric) { + const expected = Math.round(expectedMetrics[metric]); + if (expected === 0) return null; + + const actual = Math.round(actualMetrics[lanternMetric]); + const diff = Math.abs(actual - expected); + const diffAsPercent = diff / expected; + + return {...site, expected, actual, diff, diffAsPercent, metric, lanternMetric}; +} + +/** @param {LanternEvaluation} evaluation */ +function isEvaluationGood(evaluation) { + return evaluation.diffAsPercent < GOOD_DIFF_AS_PERCENT_THRESHOLD; +} +/** @param {LanternEvaluation} evaluation */ +function isEvaluationOK(evaluation) { + return ( + evaluation.diffAsPercent >= GOOD_DIFF_AS_PERCENT_THRESHOLD && + evaluation.diffAsPercent < OK_DIFF_AS_PERCENT_THRESHOLD + ); +} +/** @param {LanternEvaluation} evaluation */ +function isEvaluationBad(evaluation) { + return evaluation.diffAsPercent > OK_DIFF_AS_PERCENT_THRESHOLD; +} + +/** + * @param {LanternSiteDefinition[]} entries + * @param {keyof TargetMetrics} metric + * @param {keyof LanternMetrics} lanternMetric + * @param {'lantern'|'baseline'} [lanternOrBaseline] + * @return {EstimateEvaluationSummary} + */ +function evaluateAccuracy(entries, metric, lanternMetric, lanternOrBaseline = 'lantern') { + const evaluations = []; + + const percentErrors = []; + for (const entry of entries) { + const evaluation = evaluateSite( + entry, + entry.wpt3g, + entry[lanternOrBaseline], + metric, + lanternMetric + ); + if (!evaluation) continue; + + evaluations.push(evaluation); + percentErrors.push(evaluation.diffAsPercent); + } + + percentErrors.sort((a, b) => a - b); + + const p50 = percentErrors[Math.floor((percentErrors.length / 100) * 50)]; + const p90 = percentErrors[Math.floor((percentErrors.length / 100) * 90)]; + const p95 = percentErrors[Math.floor((percentErrors.length / 100) * 95)]; + return {evaluations, p50, p90, p95}; +} + +/** @type {LanternEvaluation[]} */ +const allEvaluations = []; +/** @type {LanternEvaluation[]} */ +const baselineEvaluations = []; + +/** + * @param {number} actualValue + * @param {number} baselineValue + * @param {{isIncreaseGood?: boolean, format?: any, alwaysGray?: boolean}} [options] + */ +function toBaselineDiffString(actualValue, baselineValue, options) { + options = {isIncreaseGood: false, format: toPercentString, ...options}; + const diffAsNumber = actualValue - baselineValue; + const isGoingUp = diffAsNumber > 0; + const isGood = options.isIncreaseGood === isGoingUp; + + let arrow = isGoingUp ? '↑' : '↓'; + let color = isGood ? chalk.green : chalk.red; + if (options.alwaysGray) color = chalk.gray; + + if (Math.abs(diffAsNumber) < baselineValue * 0.01) { + arrow = '↔'; + color = chalk.gray; + } else if (Math.abs(diffAsNumber) > baselineValue * 0.1) { + arrow = arrow + arrow; + } + + const diffAsString = options.format(Math.abs(diffAsNumber)); + const text = `${arrow.padEnd(2)} ${diffAsString}`; + return color(text); +} + +/** @param {number} percentAsDecimal */ +function toPercentString(percentAsDecimal) { + return (percentAsDecimal * 100).toFixed(1) + '%'; +} + +/** + * @param {keyof TargetMetrics} metric + * @param {keyof LanternMetrics} lanternMetric + */ +function evaluateAndPrintAccuracy(metric, lanternMetric) { + const actualAccuracy = evaluateAccuracy(entries, metric, lanternMetric); + const baselineAccuracy = evaluateAccuracy(entries, metric, lanternMetric, 'baseline'); + const baselineOptions = {alwaysGray: !lanternMetric.includes('roughEstimate')}; + + const strings = [ + lanternMetric.padEnd(25), + `${toPercentString(actualAccuracy.p50)} ${toBaselineDiffString( + actualAccuracy.p50, + baselineAccuracy.p50, + baselineOptions + )}`.padEnd(30), + `${toPercentString(actualAccuracy.p90)} ${toBaselineDiffString( + actualAccuracy.p90, + baselineAccuracy.p90, + baselineOptions + )}`.padEnd(30), + `${toPercentString(actualAccuracy.p95)} ${toBaselineDiffString( + actualAccuracy.p95, + baselineAccuracy.p95, + baselineOptions + )}`.padEnd(30), + ]; + + allEvaluations.push(...actualAccuracy.evaluations); + baselineEvaluations.push(...baselineAccuracy.evaluations); + + if (lanternMetric.includes('roughEstimate')) { + console.log(...strings); + } else { + console.log(chalk.gray(...strings)); + } +} +/** + * @param {keyof TargetMetrics} metric + * @param {string[]} lanternMetrics + */ +function findAndPrintWorst10Sites(metric, lanternMetrics) { + if (!process.env.PRINT_WORST) return; + + /** @type {Map} */ + const groupedByURL = new Map(); + for (const site of allEvaluations) { + const group = groupedByURL.get(site.url) || []; + group.push(site); + groupedByURL.set(site.url, group); + } + + /** @type {LanternEvaluation[]} */ + const worstEntries = []; + for (const entries of groupedByURL.values()) { + const matchingEntries = entries.filter(entry => lanternMetrics.includes(entry.lanternMetric)); + const minDiffAsPercent = Math.min(...matchingEntries.map(entry => entry.diffAsPercent)); + const minEntry = matchingEntries.find(entry => minDiffAsPercent === entry.diffAsPercent); + if (!minEntry) continue; + worstEntries.push(minEntry); + } + + console.log(chalk.bold(`\n ------- Worst 10 ${metric} -------`)); + worstEntries + .sort((a, b) => b.diffAsPercent - a.diffAsPercent) + .slice(0, 10) + .forEach(entry => { + console.log( + entry.actual < entry.expected + ? chalk.cyan('underestimated') + : chalk.yellow('overestimated'), + entry.metric, + chalk.gray('by'), + Math.round(entry.diff), + chalk.gray('on'), + chalk.magenta(entry.url) + ); + }); +} + +function findAndPrintFixesRegressions() { + /** @type {Map} */ + const indexedByMetricURL = new Map(); + baselineEstimates.forEach(e => indexedByMetricURL.set(`${e.lanternMetric}${e.url}`, e)); + + const joinedWithBaseline = estimates.map(actual => { + const baseline = indexedByMetricURL.get(`${actual.lanternMetric}${actual.url}`); + if (!baseline) return {...actual, regression: 0, regressionAsPercent: 0}; + const regression = actual.diff - baseline.diff; + const regressionAsPercent = actual.diffAsPercent - baseline.diffAsPercent; + return {...actual, baseline, regression, regressionAsPercent}; + }); + + /** @param {LanternEvaluation} entry */ + const printEvaluation = entry => { + console.log( + entry.lanternMetric.replace('roughEstimateOf', ''), + chalk.gray('on'), + chalk.magenta(entry.url), + '-', + entry.expected, + chalk.gray('(real)'), + entry.actual, + chalk.gray('(cur)'), + // @ts-ignore - baseline always exists at this point + entry.baseline.actual, + chalk.gray('(prev)') + ); + }; + + console.log(chalk.bold('\n ------- Fixes Summary -------')); + joinedWithBaseline + .filter(e => e.regression < 0) + .sort((a, b) => a.regressionAsPercent - b.regressionAsPercent) + .slice(0, 4) + .forEach(printEvaluation); + console.log(chalk.bold('\n ------- Regression Summary -------')); + joinedWithBaseline + .filter(e => e.regression > 0) + .sort((a, b) => b.regressionAsPercent - a.regressionAsPercent) + .slice(0, 4) + .forEach(printEvaluation); +} + +console.log( + chalk.bold( + 'Metric'.padEnd(25), + 'p50 (% Error)'.padEnd(20), + 'p90 (% Error)'.padEnd(20), + 'p95 (% Error)'.padEnd(20) + ) +); + +evaluateAndPrintAccuracy('firstContentfulPaint', 'optimisticFCP'); +evaluateAndPrintAccuracy('firstContentfulPaint', 'pessimisticFCP'); +evaluateAndPrintAccuracy('firstContentfulPaint', 'roughEstimateOfFCP'); + +evaluateAndPrintAccuracy('firstMeaningfulPaint', 'optimisticFMP'); +evaluateAndPrintAccuracy('firstMeaningfulPaint', 'pessimisticFMP'); +evaluateAndPrintAccuracy('firstMeaningfulPaint', 'roughEstimateOfFMP'); + +evaluateAndPrintAccuracy('timeToFirstInteractive', 'optimisticTTFCPUI'); +evaluateAndPrintAccuracy('timeToFirstInteractive', 'pessimisticTTFCPUI'); +evaluateAndPrintAccuracy('timeToFirstInteractive', 'roughEstimateOfTTFCPUI'); + +evaluateAndPrintAccuracy('timeToConsistentlyInteractive', 'optimisticTTI'); +evaluateAndPrintAccuracy('timeToConsistentlyInteractive', 'pessimisticTTI'); +evaluateAndPrintAccuracy('timeToConsistentlyInteractive', 'roughEstimateOfTTI'); + +evaluateAndPrintAccuracy('speedIndex', 'optimisticSI'); +evaluateAndPrintAccuracy('speedIndex', 'pessimisticSI'); +evaluateAndPrintAccuracy('speedIndex', 'roughEstimateOfSI'); + +const estimates = allEvaluations.filter(entry => entry.lanternMetric.includes('roughEstimate')); +const baselineEstimates = baselineEvaluations.filter(entry => + entry.lanternMetric.includes('roughEstimate') +); + +findAndPrintWorst10Sites('firstContentfulPaint', [ + 'optimisticFCP', + 'pessimisticFCP', + 'roughEstimateOfFCP', +]); +findAndPrintWorst10Sites('firstMeaningfulPaint', [ + 'optimisticFMP', + 'pessimisticFMP', + 'roughEstimateOfFMP', +]); +findAndPrintWorst10Sites('timeToFirstInteractive', [ + 'optimisticTTFCPUI', + 'pessimisticTTFCPUI', + 'roughEstimateOfTTFCPUI', +]); +findAndPrintWorst10Sites('timeToConsistentlyInteractive', [ + 'optimisticTTI', + 'pessimisticTTI', + 'roughEstimateOfTTI', +]); +findAndPrintWorst10Sites('speedIndex', ['optimisticSI', 'pessimisticSI', 'roughEstimateOfSI']); + +findAndPrintFixesRegressions(); + +/** + * @param {string} label + * @param {(e: LanternEvaluation) => boolean} bucketFilterFn + * @param {any} opts + */ +const printBucket = (label, bucketFilterFn, opts) => { + const numInBucket = estimates.filter(bucketFilterFn).length; + const baselineInBucket = baselineEstimates.filter(bucketFilterFn).length; + + const actual = numInBucket; + const baseline = baselineInBucket; + console.log( + `${label}:`.padEnd(10), + actual.toString().padEnd(5), + // @ts-ignore - overly aggressive no implicit any + toBaselineDiffString(actual, baseline, {...opts, format: x => x.toString()}) + ); +}; + +console.log(chalk.bold('\n ------- Bucket Summary -------')); +printBucket('Good', isEvaluationGood, {isIncreaseGood: true}); +printBucket('OK', isEvaluationOK, {alwaysGray: true}); +printBucket('Bad', isEvaluationBad, {isIncreaseGood: false}); + +const percentErrors = estimates.map(x => x.diffAsPercent).sort((a, b) => a - b); +const baselinePercentErrors = baselineEstimates.map(x => x.diffAsPercent).sort((a, b) => a - b); + +/** @param {number} percentile */ +const printPercentile = percentile => { + const index = Math.floor((percentErrors.length / 100) * percentile); + const actual = percentErrors[index]; + const baseline = baselinePercentErrors[index]; + console.log( + `p${percentile}:`.padEnd(10), + toPercentString(actual), + toBaselineDiffString(actual, baseline) + ); +}; + +console.log(chalk.bold('\n ------- % Error Summary -------')); +printPercentile(50); +printPercentile(90); +printPercentile(95);