From 60977253f9aa68e334b5aae07f11d9ed607c16da Mon Sep 17 00:00:00 2001 From: Michael Telatynski <7t3chguy@gmail.com> Date: Wed, 8 Dec 2021 18:15:51 +0000 Subject: [PATCH 1/2] Fix html exporting and improve output size --- src/utils/exportUtils/HtmlExport.tsx | 11 ++++- src/utils/exportUtils/exportCSS.ts | 67 ++++++++++++++++++---------- tsconfig.json | 2 +- 3 files changed, 54 insertions(+), 26 deletions(-) diff --git a/src/utils/exportUtils/HtmlExport.tsx b/src/utils/exportUtils/HtmlExport.tsx index 7c8265fd32c..9317914fa7b 100644 --- a/src/utils/exportUtils/HtmlExport.tsx +++ b/src/utils/exportUtils/HtmlExport.tsx @@ -40,6 +40,9 @@ import { textForEvent } from "../../TextForEvent"; import { logger } from "matrix-js-sdk/src/logger"; +const htmlClassRegex = /class="\s*(.+?)\s*"/gis; +const whiteSpaceRegex = /\s+/g; + export default class HTMLExporter extends Exporter { protected avatars: Map; protected permalinkCreator: RoomPermalinkCreator; @@ -419,11 +422,17 @@ export default class HTMLExporter extends Exporter { this.updateProgress(`Fetched ${res.length} events in ${(fetchEnd - fetchStart)/1000}s`, true, false); this.updateProgress("Creating HTML..."); + + const usedClasses = new Set(); for (let page = 0; page < res.length / 1000; page++) { const html = await this.createHTML(res, page * 1000); + for (const [, classes] of html.matchAll(htmlClassRegex)) { + classes.split(whiteSpaceRegex).forEach(c => usedClasses.add(c)); + } this.addFile(`messages${page ? page + 1 : ""}.html`, new Blob([html])); } - const exportCSS = await getExportCSS(); + + const exportCSS = await getExportCSS(usedClasses); this.addFile("css/style.css", new Blob([exportCSS])); this.addFile("js/script.js", new Blob([exportJS])); diff --git a/src/utils/exportUtils/exportCSS.ts b/src/utils/exportUtils/exportCSS.ts index f7f471fda3e..671d3326182 100644 --- a/src/utils/exportUtils/exportCSS.ts +++ b/src/utils/exportUtils/exportCSS.ts @@ -18,33 +18,52 @@ limitations under the License. import customCSS from "!!raw-loader!./exportCustomCSS.css"; -const getExportCSS = async (): Promise => { - const stylesheets: string[] = []; - document.querySelectorAll('link[rel="stylesheet"]').forEach((e: any) => { - if (e.href.endsWith("bundle.css") || e.href.endsWith("theme-light.css")) { - stylesheets.push(e.href); - } +const cssSelectorTextClassesRegex = /\.[\w-]+/g; + +function mutateCssText(css: string): string { + // replace used fonts so that we don't have to bundle Inter & Inconsalata + return css + .replace( + /font-family: ?(Inter|'Inter'|"Inter")/g, + `font-family: -apple-system, BlinkMacSystemFont, avenir next, + avenir, segoe ui, helvetica neue, helvetica, Ubuntu, roboto, noto, arial, sans-serif`, + ) + .replace( + /font-family: ?Inconsolata/g, + "font-family: Menlo, Consolas, Monaco, Liberation Mono, Lucida Console, monospace", + ); +} + +// naively culls unused css rules based on which classes are present in the html, +// doesn't cull rules which won't apply due to the full selector not matching but gets rid of a LOT of cruft anyway. +const getExportCSS = async (usedClasses: Set): Promise => { + // only include bundle.css and the data-mx-theme=light styling + const stylesheets = Array.from(document.styleSheets).filter(s => { + return s.href?.endsWith("bundle.css") || (s.ownerNode as HTMLStyleElement).dataset.mxTheme === "light"; }); - let CSS = ""; + + let css = ""; for (const stylesheet of stylesheets) { - const res = await fetch(stylesheet); - const innerText = await res.text(); - CSS += innerText; + for (const rule of stylesheet.cssRules) { + if (rule instanceof CSSFontFaceRule) continue; // we don't want to bundle any fonts + + const selectorText = (rule as CSSStyleRule).selectorText; + + // only skip the rule if all branches (,) of the selector are redundant + if (selectorText?.split(",").every(selector => { + const classes = selector.match(cssSelectorTextClassesRegex); + if (classes && !classes.every(c => usedClasses.has(c.substring(1)))) { + return true; // signal as a redundant selector + } + })) { + continue; // skip this rule as it is redundant + } + + css += mutateCssText(rule.cssText) + "\n"; + } } - const fontFaceRegex = /@font-face {.*?}/sg; - - CSS = CSS.replace(fontFaceRegex, ''); - CSS = CSS.replace( - /font-family: (Inter|'Inter')/g, - `font-family: -apple-system, BlinkMacSystemFont, avenir next, - avenir, segoe ui, helvetica neue, helvetica, Ubuntu, roboto, noto, arial, sans-serif`, - ); - CSS = CSS.replace( - /font-family: Inconsolata/g, - "font-family: Menlo, Consolas, Monaco, Liberation Mono, Lucida Console, monospace", - ); - - return CSS + customCSS; + + return css + customCSS; }; export default getExportCSS; diff --git a/tsconfig.json b/tsconfig.json index 02904af9d10..f74079696eb 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -13,7 +13,7 @@ "declaration": true, "jsx": "react", "lib": [ - "es2019", + "es2020", "dom", "dom.iterable" ], From 1d6cf89065ff36b9dd9a7037aad91a858ca4feed Mon Sep 17 00:00:00 2001 From: Michael Telatynski <7t3chguy@gmail.com> Date: Wed, 8 Dec 2021 18:30:39 +0000 Subject: [PATCH 2/2] Switch to DOMParser instead of jank regex in case I missed some edge that it didn't handle, also, sanity --- src/utils/exportUtils/HtmlExport.tsx | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/utils/exportUtils/HtmlExport.tsx b/src/utils/exportUtils/HtmlExport.tsx index 9317914fa7b..4b70ce7e62a 100644 --- a/src/utils/exportUtils/HtmlExport.tsx +++ b/src/utils/exportUtils/HtmlExport.tsx @@ -40,9 +40,6 @@ import { textForEvent } from "../../TextForEvent"; import { logger } from "matrix-js-sdk/src/logger"; -const htmlClassRegex = /class="\s*(.+?)\s*"/gis; -const whiteSpaceRegex = /\s+/g; - export default class HTMLExporter extends Exporter { protected avatars: Map; protected permalinkCreator: RoomPermalinkCreator; @@ -426,9 +423,10 @@ export default class HTMLExporter extends Exporter { const usedClasses = new Set(); for (let page = 0; page < res.length / 1000; page++) { const html = await this.createHTML(res, page * 1000); - for (const [, classes] of html.matchAll(htmlClassRegex)) { - classes.split(whiteSpaceRegex).forEach(c => usedClasses.add(c)); - } + const document = new DOMParser().parseFromString(html, "text/html"); + document.querySelectorAll("*").forEach(element => { + element.classList.forEach(c => usedClasses.add(c)); + }); this.addFile(`messages${page ? page + 1 : ""}.html`, new Blob([html])); }