Skip to content

Commit

Permalink
Split up Slice Clip handling & HTML conversion (#219)
Browse files Browse the repository at this point in the history
We previously let the browser handle pasting into our input area, and then ran a single converstion process on that HTML to make markdown. Since adding support for extra metadata that is only present in the "slice clip" format and not in HTML, this process has gotten more fragile: the slice clip tightly binds formatting info to specific indexes in the text, but our input area allows people to tweak the content after pasting, which invalidates the slice clip metadata's indexes.

This switches the architecture to convert in two steps:
1. On paste, merge the slice clip data we care about into the HTML from the clipboard by adding or changing DOM nodes to mark the relevant info from the slice clip, and drop the result into the input area.
2. On change of the input text, convert the HTML in the input to Markdown.

This allows people to tweak the input, because we converted the special slice clip data into a DOM-centric representation (e.g. bookmarks are now `<a>` elements, not arbitrary positions in the text) in step 1, which can be freely manipulated via `contenteditable`. It also makes pastes into the middle of an existing document in the input area robust to the same sort of issues.
  • Loading branch information
Mr0grog authored Aug 22, 2024
1 parent 531c3eb commit 08cb4c3
Show file tree
Hide file tree
Showing 22 changed files with 640 additions and 461 deletions.
6 changes: 6 additions & 0 deletions index.html
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,12 @@
font-weight: normal;
text-decoration: none;
}

/* Make bookmarks visible in the input area. */
#input a[id^="id."]:before,
#input a[name^="id."]:before {
content: "※ "
}
</style>
</head>
<body>
Expand Down
78 changes: 54 additions & 24 deletions lib-ui/index.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import { convertDocsHtmlToMarkdown, defaultOptions } from '../lib/convert.js';
import {
convertDocsHtmlToMarkdown,
defaultOptions,
combineGoogleDocFormats,
} from '../lib/convert.js';
import { settings as currentSettings } from './settings.js';
import debug from 'debug';

const SLICE_CLIP_MEDIA_TYPE =
'application/x-vnd.google-docs-document-slice-clip';

const log = debug('app:index:debug');

const settingsForm = document.getElementById('settings');
const inputElement = document.getElementById('input');
const outputElement = document.getElementById('output');
Expand All @@ -16,7 +17,7 @@ const outputInstructions = document.querySelector('#output-area .instructions');
function convert() {
convertDocsHtmlToMarkdown(
inputElement.innerHTML,
latestSliceClip,
null,
currentSettings.getAll()
)
.then((markdown) => {
Expand All @@ -29,32 +30,61 @@ function convert() {
});
}

// Hold most recently pasted Slice Clip (the Google Docs internal copy/paste
// format) globally so we can re-use it if the user hand-edits the input.
let latestSliceClip = null;
inputElement.addEventListener('paste', (event) => {
function handleInput() {
const hasContent = !!inputElement.textContent;
inputInstructions.style.display = hasContent ? 'none' : '';

convert();
}

inputElement.addEventListener('input', handleInput);

// If the clipboard data looks like it came from Google Docs, do some
// pre-processing before inserting it into the input area.
//
// This handles two things:
// 1. Some wrapper structure in the HTML that we want to clean out.
// 2. Pulling relevant data out of the "Slice Clip" format and updating the HTML
// with it (when available). The clipboard HTML format from Google Docs is
// missing a lot of detailed info the slice clip has.
inputElement.addEventListener('paste', async (event) => {
if (!event.clipboardData) {
console.warn('Could not access clipboard data from paste event');
return;
}

// Allow for raw or wrapped slice clips (one uses a "+wrapped" suffix).
const sliceClipType = event.clipboardData.types.find((type) =>
type.startsWith(SLICE_CLIP_MEDIA_TYPE)
);
log('Slice clip media type: %s', sliceClipType);
if (sliceClipType) {
const sliceClip = event.clipboardData.getData(sliceClipType);
log('raw slice clip: %s', sliceClip);
latestSliceClip = sliceClip;
}
});
let sliceClip =
event.clipboardData.getData(SLICE_CLIP_MEDIA_TYPE) ||
event.clipboardData.getData(`${SLICE_CLIP_MEDIA_TYPE}+wrapped`);

inputElement.addEventListener('input', () => {
const hasContent = !!inputElement.textContent;
inputInstructions.style.display = hasContent ? 'none' : '';
let html =
event.clipboardData.getData('text/html') ||
event.clipboardData.getData('public.html');

convert();
// Both paste types may not always be present. Some browsers (mainly Safari)
// do not allow cross-origin access to clipboard formats except a select few,
// and so block access to the slice clip data.
//
// More info:
// - https://webkit.org/blog/10855/async-clipboard-api/
// - https://webkit.org/blog/8170/clipboard-api-improvements/
if ((html && sliceClip) || /id=['"']docs-internal-guid-/.test(html)) {
event.preventDefault();
const fancyHtml = await combineGoogleDocFormats(html, sliceClip);

const selection = window.getSelection();
if (selection.anchorNode && inputElement.contains(selection.anchorNode)) {
// `execCommand` is discouraged these days, but is the only built-in that
// does a nice job normalizing the HTML given the input location.
// (That is, it handles inserting a `<p>` inside a `<p>` or some other
// incompatible situation gracefully.)
document.execCommand('insertHTML', false, fancyHtml);
} else {
inputElement.innerHTML = fancyHtml;
}

handleInput();
}
});

const copyButton = document.getElementById('copy-button');
Expand Down
100 changes: 55 additions & 45 deletions lib/convert.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
// browser APIs -- reduces bundle size by ~200 kB!
import parse from 'rehype-dom-parse';
import { defaultHandlers } from 'hast-util-to-mdast';
import rehypeStringify from 'rehype-stringify';
import remarkGfm from 'remark-gfm';
import stringify from 'remark-stringify';
import { unified } from 'unified';
import GithubSlugger from 'github-slugger';
import fixGoogleHtml from './fix-google-html.js';
import { updateHtmlWithSliceClip, cleanGoogleHtml } from './fix-google-html.js';
import { getHastTextContent } from './hast-tools.js';
// import logTree from './log-tree.js';
import rehype2remarkWithSpaces from './rehype-to-remark-with-spaces.js';
Expand Down Expand Up @@ -58,8 +59,6 @@ function headingWithIdHandler({ headingIds }) {
};
}

const slugger = new GithubSlugger();

/**
* Create a handler for `<a>` elements. The default handler is pretty basic,
* and this adds support for linking to headings by slug (instead of ID) and
Expand All @@ -68,6 +67,8 @@ const slugger = new GithubSlugger();
* @returns {Handle}
*/
function anchorHandler({ headingIds }) {
const slugger = new GithubSlugger();

/** @type {Handle} */
return function anchorToMdast(state, node, _parent) {
const anchorName = node.properties.id || node.properties.name;
Expand Down Expand Up @@ -107,39 +108,36 @@ function doubleBlankLinesBeforeHeadings(previous, next, _parent, _state) {
return undefined;
}

function createProcessor(options) {
function createProcessor(options, converter = cleanGoogleHtml) {
const headingWithId = headingWithIdHandler(options);

return (
unified()
.use(parse)
.use(fixGoogleHtml)
// .use(logTree)
.use(rehype2remarkWithSpaces, {
handlers: {
// Preserve sup/sub markup; most Markdowns have no markup for it.
sub: preserveTagAndConvertContents,
sup: preserveTagAndConvertContents,
ins: preserveTagAndConvertContents,
h1: headingWithId,
h2: headingWithId,
h3: headingWithId,
h4: headingWithId,
h5: headingWithId,
h6: headingWithId,
a: anchorHandler(options),
},
})
.use(remarkGfm)
.use(stringify, {
bullet: '-',
emphasis: '_',
fences: options.codeBlocks === 'fenced',
listItemIndent: 'one',
strong: '*',
join: [doubleBlankLinesBeforeHeadings],
})
);
return unified()
.use(parse)
.use(converter)
.use(rehype2remarkWithSpaces, {
handlers: {
// Preserve sup/sub markup; most Markdowns have no markup for it.
sub: preserveTagAndConvertContents,
sup: preserveTagAndConvertContents,
ins: preserveTagAndConvertContents,
h1: headingWithId,
h2: headingWithId,
h3: headingWithId,
h4: headingWithId,
h5: headingWithId,
h6: headingWithId,
a: anchorHandler(options),
},
})
.use(remarkGfm)
.use(stringify, {
bullet: '-',
emphasis: '_',
fences: options.codeBlocks === 'fenced',
listItemIndent: 'one',
strong: '*',
join: [doubleBlankLinesBeforeHeadings],
});
}

/**
Expand Down Expand Up @@ -172,17 +170,29 @@ function parseGdocsSliceClip(raw) {
return data;
}

export function convertDocsHtmlToMarkdown(html, rawSliceClip, options) {
const sliceClip = rawSliceClip ? parseGdocsSliceClip(rawSliceClip) : null;
export async function convertDocsHtmlToMarkdown(html, rawSliceClip, options) {
options = { ...defaultOptions, ...options };

return createProcessor(options)
.process({
value: html,
data: {
sliceClip,
options,
},
})
.then((result) => result.value);
if (rawSliceClip) {
html = await combineGoogleDocFormats(html, rawSliceClip);
}

const result = await createProcessor(options, cleanGoogleHtml).process({
value: html,
data: { options },
});

return result.value;
}

export async function combineGoogleDocFormats(html, rawSliceClip) {
const sliceClip = rawSliceClip ? parseGdocsSliceClip(rawSliceClip) : null;

const result = await unified()
.use(parse)
.use(updateHtmlWithSliceClip)
.use(rehypeStringify)
.process({ value: html, data: { sliceClip } });

return result.value;
}
Loading

0 comments on commit 08cb4c3

Please sign in to comment.