Skip to content

Commit

Permalink
Initial build of app
Browse files Browse the repository at this point in the history
  • Loading branch information
Mr0grog committed Jun 15, 2018
0 parents commit f6fabe7
Show file tree
Hide file tree
Showing 12 changed files with 6,925 additions and 0 deletions.
7 changes: 7 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[*]
indent_size = 2
indent_style = space
insert_final_newline = true

[*.md]
indent_size = 4
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
node_modules
12 changes: 12 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Copyright (c) 2018, Rob Brackett
All rights reserved.

Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 changes: 36 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Google Docs to Markdown

This is a very minimal webapp for converting a Google Doc to Markdown. It uses [Remark](https://github.com/remarkjs/remark) and [Rehype](https://github.com/rehypejs/rehype) (both part of [Unified](https://unifiedjs.github.io)) to do the conversion.


## Install & Build

First make sure you have Node.js installed. Then:

1. In the directory where you have cloned this repository, run `npm install`:

```sh
> cd /path/to/cloned/repo
> npm install
```

2. For a one-time build, run:

```sh
> npm run build
```

…and the built output will be in the `dist` folder.

To start a server with live rebuilding, run:

```sh
> npm start
```

Then point your browser to `http://localhost:9000` to see the site. It will automatically rebuild whenever you change any files.


## License

GDoc2MD is open source software. It is (c) 2018 Rob Brackett and licensed under the BSD license. The full license text is in the LICENSE file.
118 changes: 118 additions & 0 deletions index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
<!doctype html>
<html>
<head>
<meta charset="utf-8">
<title>Convert Google Doc to Markdown</title>
<!-- Include Source Sans because it's the default for Google Docs -->
<link href="https://fonts.googleapis.com/css?family=Source+Sans+Pro" rel="stylesheet">
<style type="text/css">
* {
box-sizing: border-box;
}

html, body {
background: #eee;
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
height: 100%;
margin: 0;
}

body {
display: flex;
flex-direction: column;
}

main {
display: flex;
flex: 1 1 auto;
flex-direction: row;
justify-content: space-between;
width: 100%;
padding: 1em 2em;
}

#app-header {
flex: 0 0 auto;
margin: 0;
padding: 1em 2em;
}

#app-header h1 {
margin: 0;
padding: 0;
}

.input-field {
border: 1px solid #ccc;
font-size: 1em;
overflow: auto;
padding: 1em;
}

#input-area {
position: relative;
width: calc(50% - 1em);
}

.instructions {
font-size: 2em;
font-weight: bold;
opacity: 0.5;
padding: 0 1em;
position: absolute;
top: 1em;
left: 0;
right: 0;
text-align: center;
}

#input {
position: absolute;
left: 0;
top: 0;
right: 0;
bottom: 0;
z-index: 1;
}

#output-area {
position: relative;
width: calc(50% - 1em);
}

#output {
background: transparent;
position: absolute;
left: 0;
top: 0;
width: 100%;
height: 100%;
z-index: 1;
white-space: pre;
}
</style>
</head>
<body>
<header id="app-header">
<h1>Convert Google Docs to Markdown</h1>
</header>

<main>
<div id="input-area">
<p class="instructions">Paste Google Docs text here…</p>
<div id="input" class="input-field" contenteditable></div>
</div>

<div id="output-area">
<!-- TODO: copy button -->
<p class="instructions">…and get your Markdown here</p>
<textarea id="output" class="input-field"></textarea>
</div>
</main>

<footer>
<!-- TODO: Github link -->
</footer>
<script src="bundle.js"></script>
</body>
</html>
43 changes: 43 additions & 0 deletions index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import fixGoogleHtml from './lib/fix-google-html';
// rehype-dom-parse is a lightweight version of rehype-parse that leverages
// browser APIs -- reduces bundle size by ~200 kB!
// const parse = require('rehype-dom-parse').default;
import parse from 'rehype-dom-parse';
import rehype2remarkWithSpaces from './lib/rehype-to-remark-with-spaces';
import stringify from 'remark-stringify';
import unified from 'unified';


const processor = unified()
.use(parse)
.use(fixGoogleHtml)
// .use(require('./lib/log-tree'))
.use(rehype2remarkWithSpaces)
.use(stringify);

function convertToMarkdown (html) {
return processor.process(inputElement.innerHTML).then(String);
}


const inputElement = document.getElementById('input');
const outputElement = document.getElementById('output');
const inputInstructions = document.querySelector('#input-area .instructions');
const outputInstructions = document.querySelector('#output-area .instructions');

inputElement.addEventListener('input', event => {
const hasContent = !!inputElement.textContent;
inputInstructions.style.display = hasContent ? 'none' : '';

convertToMarkdown(inputElement.innerHTML)
.then(markdown => {
outputElement.value = markdown;
outputInstructions.style.display = markdown.trim() ? 'none' : '';
})
.catch(error => {
console.error(error);
outputInstructions.style.display = '';
});
});

window.convertToMarkdown = convertToMarkdown;
88 changes: 88 additions & 0 deletions lib/fix-google-html.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
'use strict';

import hast from 'hastscript';
import visit from 'unist-util-visit';

const isList = node => node.tagName === 'ul' || node.tagName === 'ol';
const isStyled = node => node.type === 'element' && node.properties.style;

// Wrap the children of `node` with the `wrapper` node.
function wrapChildren (node, wrapper) {
wrapper.children = node.children;
node.children = [wrapper];
return wrapper;
}

/**
* Fix the incorrect formatting of nested lists in Google Docs's HTML. Lists
* can only have `div` and `li` children, but Google Docs has other lists as
* direct descendents. This moves those free-floating lists into the previous
* `li` element under the assumption that they represent subitems of it.
*
* @param {RehypeNode} node Fix the tree below this node
*
* @example
* Input a tree like:
* <ul>
* <li>An item!</li>
* <ul>
* <li>A subitem!</li>
* </ul>
* </ul>
*
* Output:
* <ul>
* <li>An Item!
* <ul>
* <li>A subitem!</li>
* </ul>
* </li>
* </ul>
*/
export function fixNestedLists (node) {
visit(node, isList, (node, index, parent) => {
if (isList(parent)) {
const previous = parent.children[index - 1];
if (previous && previous.tagName === 'li') {
previous.children.push(node);
parent.children.splice(index, 1);
return index;
}
else {
console.warn('No previous list item to move nested list into!');
}
}
});
}

/**
* Google Docs does italics/bolds/etc on <span>s with style attributes, but
* rehype-remark does pick up on those well. Instead, transform them into
* `em`, `strong`, etc. elements.
*
* @param {RehypeNode} node Fix the tree below this node
*/
export function unInlineStyles (node) {
visit(node, isStyled, (node, index, parent) => {
const style = node.properties.style;
if (/font-style:\s*italic/.test(style)) {
wrapChildren(node, hast('em'));
}
if (/font-weight:\s*(bold|700)/.test(style)) {
wrapChildren(node, hast('strong'));
}
});
}

/**
* A rehype plugin to clean up the HTML of a Google Doc. .This applies to the
* live HTML of Doc, as when you copy and paste it; not *exported* HTML (it
* might apply there, too; I haven’t looked into it).
*/
export default function fixGoogleHtml () {
return (tree, file) => {
unInlineStyles(tree);
fixNestedLists(tree);
return tree;
};
}
22 changes: 22 additions & 0 deletions lib/log-tree.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/**
* A rehype plugin that logs the current (HTML) tree.
*/
export default function logTree () {
function logNode (node, indent = 0) {
let name = `(${node.type})`;
if (node.type === 'text') {
name = `${name}: ${node.value}`;
}
else if (node.type === 'element') {
name = `<${node.tagName}>`;
}

console.log(`${' '.repeat(indent)}- ${name}`);

if (node.children) {
node.children.forEach(child => logNode(child, indent + 2));
}
}

return tree => logNode(tree);
};
38 changes: 38 additions & 0 deletions lib/rehype-to-remark-with-spaces.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
const rehype2remark = require('rehype-remark');

/**
* The official rehype-remark plugin gets a little aggeressive with removing
* spaces, so this wraps it with some space preservation.
*
* Ideally, this needs to be solved upstream in rehype-remark.
* TODO: create a minimal test case and file a bug there!
*/
export default function rehype2remarkWithSpaces () {
const spaceToken = '++IAMASPACE++';

function preserveInitialSpaces (node) {
if (node.type === 'text' && node.value.startsWith(' ')) {
node.value = spaceToken + node.value.slice(1);
}
if (node.children) {
node.children.forEach(preserveInitialSpaces);
}
}

function recreateSpaces (node) {
if (node.type === 'text') {
node.value = node.value.split(spaceToken).join(' ');
}
if (node.children) {
node.children.forEach(recreateSpaces);
}
}

const convert = rehype2remark.apply(this, arguments);
return function (tree, file) {
preserveInitialSpaces(tree);
const markdownTree = convert.apply(this, [tree, file]);
recreateSpaces(markdownTree);
return markdownTree;
}
};
Loading

0 comments on commit f6fabe7

Please sign in to comment.