Skip to content

Commit

Permalink
Merge pull request #26 from alphagov/fix-gdocs-lists
Browse files Browse the repository at this point in the history
Fix google docs lists
  • Loading branch information
kevindew authored Apr 4, 2019
2 parents 9bd991b + a2ccf8e commit 5dbb194
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 5 deletions.
55 changes: 53 additions & 2 deletions src/to-govspeak.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import TurndownService from 'turndown'

const service = new TurndownService({
bulletListMarker: '-'
bulletListMarker: '-',
listIndent: ' ' // 3 spaces
})

// As a user may have pasted markdown we rather crudley
Expand Down Expand Up @@ -114,6 +115,56 @@ service.addRule('cleanUpNestedLinks', {
}
})

// Google docs has a habit of producing nested lists that are not nested
// with valid HTML. Rather than embedding sub lists inside an <li> element they
// are nested in the <ul> or <ol> element.
service.addRule('invalidNestedLists', {
filter: (node) => {
const nodeName = node.nodeName.toLowerCase()
if ((nodeName === 'ul' || nodeName === 'ol') && node.previousElementSibling) {
const previousNodeName = node.previousElementSibling.nodeName.toLowerCase()
return previousNodeName === 'li'
}
},
replacement: (content, node, options) => {
content = content
.replace(/^\n+/, '') // remove leading newlines
.replace(/\n+$/, '') // replace trailing newlines
.replace(/\n/gm, `\n${options.listIndent}`) // indent all nested content in the list

// indent this list to match sibling
return options.listIndent + content + '\n'
}
})

// This is ported from https://github.com/domchristie/turndown/blob/80297cebeae4b35c8d299b1741b383c74eddc7c1/src/commonmark-rules.js#L61-L80
// It is modified in the following ways:
// - Only determines ol ordering based on li elements
// - Removes handling of ol start attribute as this doesn't affect govspeak output
// - Makes spacing consistent with gov.uk markdown guidance
service.addRule('listItems', {
filter: 'li',
replacement: function (content, node, options) {
content = content
.replace(/^\n+/, '') // remove leading newlines
.replace(/\n+$/, '\n') // replace trailing newlines with just a single one
.replace(/\n/gm, `\n${options.listIndent}`) // indent all nested content in the list

let prefix = options.bulletListMarker + ' '
const parent = node.parentNode
if (parent.nodeName.toLowerCase() === 'ol') {
const listItems = Array.prototype.filter.call(
parent.children, (element) => element.nodeName.toLowerCase() === 'li'
)
const index = Array.prototype.indexOf.call(listItems, node)
prefix = (index + 1).toString() + '. '
}
return (
prefix + content + (node.nextSibling && !/\n$/.test(content) ? '\n' : '')
)
}
})

function removeBrParagraphs (govspeak) {
// This finds places where we have a br in a paragraph on it's own and
// removes it.
Expand All @@ -129,7 +180,7 @@ function removeBrParagraphs (govspeak) {
function extractHeadingsFromLists (govspeak) {
// This finds instances of headings within ordered lists and replaces them
// with the headings only. This only applies to H2 and H3.
const headingsInListsRegExp = new RegExp(/\d\.\s{2}(#{2,3})/, 'g')
const headingsInListsRegExp = new RegExp(/\d\.\s(#{2,3})/, 'g')
return govspeak.replace(headingsInListsRegExp, '$1')
}

Expand Down
54 changes: 51 additions & 3 deletions test/to-govspeak.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ it('converts lists to a dash bullet style', () => {
<li>Item 2</li>
</ul>
`
expect(toGovspeak(html)).toEqual('- Item 1\n- Item 2')
expect(toGovspeak(html)).toEqual('- Item 1\n- Item 2')
})

it('maintains H2 and H3 headers', () => {
Expand Down Expand Up @@ -110,7 +110,7 @@ it('extracts headers from lists', () => {
<li><h3>Item 2</h3></li>
</ol>
`
expect(toGovspeak(html)).toEqual('## Item 1\n \n### Item 2')
expect(toGovspeak(html)).toEqual('## Item 1\n \n### Item 2')
})

it('strips paragraph elements within a list item', () => {
Expand All @@ -120,7 +120,7 @@ it('strips paragraph elements within a list item', () => {
<li><p>Item 2</p></li>
</ul>
`
expect(toGovspeak(html)).toEqual('- Item 1\n- Item 2')
expect(toGovspeak(html)).toEqual('- Item 1\n- Item 2')
})

it('removes nested links when link markdown text is wrapped in an element', () => {
Expand All @@ -136,3 +136,51 @@ it('removes nested links when link markdown text is not wrapped in an element',
`
expect(toGovspeak(html)).toEqual('[nested link](https://www.gov.uk/)')
})

it('fixes an invalid nested unordered list that Google Docs produces', () => {
const html = `
<ul>
<li>Parent</li>
<ul>
<li>Child</li>
<ul>
<li>Grand child</li>
</ul>
</ul>
<li>Parent sibling</li>
</ul>
`
expect(toGovspeak(html)).toEqual(
'- Parent\n' +
' - Child\n' +
' - Grand child\n' +
'- Parent sibling'
)
})

it('fixes an invalid nested ordered list that Google Docs produces', () => {
const html = `
<ol>
<li>Parent</li>
<ol>
<li>Child 1</li>
<ol>
<li>Grand child 1</li>
<li>Grand child 2</li>
</ol>
<li>Child 2</li>
<li>Child 3</li>
</ol>
<li>Parent sibling</li>
</ol>
`
expect(toGovspeak(html)).toEqual(
'1. Parent\n' +
' 1. Child 1\n' +
' 1. Grand child 1\n' +
' 2. Grand child 2\n' +
' 2. Child 2\n' +
' 3. Child 3\n' +
'2. Parent sibling'
)
})

0 comments on commit 5dbb194

Please sign in to comment.