From 9acde660827d0ae4953ed89ea4bd677f8cf55119 Mon Sep 17 00:00:00 2001
From: James Adam <jcadam@endor.adam>
Date: Mon, 8 Jun 2015 18:24:25 -0400
Subject: [PATCH] Updated to cmark 0.20.0

---
 blocks.c         | 318 ++++++++++++++++++---------------
 buffer.c         | 217 +++++++++++------------
 buffer.h         |  94 ++++------
 chunk.h          |  33 ++--
 cmark.c          |   2 +-
 cmark.h          |   2 +-
 commonmark.c     |  52 +++---
 commonmark.go    |   2 +-
 config.h         |   2 +
 houdini.h        |  19 +-
 houdini_href_e.c |   4 +-
 houdini_html_e.c |   6 +-
 houdini_html_u.c |  53 +++---
 html.c           |  35 ++--
 inlines.c        | 147 ++++++++--------
 inlines.h        |   6 +-
 iterator.c       |   8 +-
 node.c           |  27 +--
 node.h           |   4 +-
 parser.h         |   6 +-
 references.c     |   4 +-
 references.h     |   4 +-
 scanners.c       | 444 +++++++++++++++++++++++++++++------------------
 scanners.h       |  30 ++--
 scanners.re      |  82 ++++-----
 utf8.c           |  20 +--
 utf8.h           |   6 +-
 xml.c            |  15 +-
 28 files changed, 899 insertions(+), 743 deletions(-)

diff --git a/blocks.c b/blocks.c
index a15f819..a3ac712 100644
--- a/blocks.c
+++ b/blocks.c
@@ -18,13 +18,19 @@
 #define CODE_INDENT 4
 #define peek_at(i, n) (i)->data[n]
 
+static inline bool
+S_is_line_end_char(char c)
+{
+	return (c == '\n' || c == '\r');
+}
+
 static void
 S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len,
               bool eof);
 
 static void
 S_process_line(cmark_parser *parser, const unsigned char *buffer,
-               size_t bytes);
+               bufsize_t bytes);
 
 static cmark_node* make_block(cmark_node_type tag, int start_line, int start_column)
 {
@@ -63,6 +69,10 @@ cmark_parser *cmark_parser_new(int options)
 	parser->root = document;
 	parser->current = document;
 	parser->line_number = 0;
+	parser->offset = 0;
+	parser->first_nonspace = 0;
+	parser->indent = 0;
+	parser->blank = false;
 	parser->curline = line;
 	parser->last_line_length = 0;
 	parser->linebuf = buf;
@@ -85,10 +95,11 @@ static cmark_node*
 finalize(cmark_parser *parser, cmark_node* b);
 
 // Returns true if line has only space characters, else false.
-static bool is_blank(cmark_strbuf *s, int offset)
+static bool is_blank(cmark_strbuf *s, bufsize_t offset)
 {
 	while (offset < s->size) {
 		switch (s->ptr[offset]) {
+		case '\r':
 		case '\n':
 			return true;
 		case ' ':
@@ -117,7 +128,7 @@ static inline bool accepts_lines(cmark_node_type block_type)
 	        block_type == NODE_CODE_BLOCK);
 }
 
-static void add_line(cmark_node* node, cmark_chunk *ch, int offset)
+static void add_line(cmark_node* node, cmark_chunk *ch, bufsize_t offset)
 {
 	assert(node->open);
 	cmark_strbuf_put(&node->string_content, ch->data + offset, ch->len - offset);
@@ -125,12 +136,13 @@ static void add_line(cmark_node* node, cmark_chunk *ch, int offset)
 
 static void remove_trailing_blank_lines(cmark_strbuf *ln)
 {
-	int i;
+	bufsize_t i;
+	unsigned char c;
 
 	for (i = ln->size - 1; i >= 0; --i) {
-		unsigned char c = ln->ptr[i];
+		c = ln->ptr[i];
 
-		if (c != ' ' && c != '\t' && c != '\r' && c != '\n')
+		if (c != ' ' && c != '\t' && !S_is_line_end_char(c))
 			break;
 	}
 
@@ -139,9 +151,16 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln)
 		return;
 	}
 
-	i = cmark_strbuf_strchr(ln, '\n', i);
-	if (i >= 0)
+
+	for(; i < ln->size; ++i) {
+		c = ln->ptr[i];
+
+		if (!S_is_line_end_char(c))
+			continue;
+
 		cmark_strbuf_truncate(ln, i);
+		break;
+	}
 }
 
 // Check to see if a node ends with a blank line, descending
@@ -185,8 +204,7 @@ static int break_out_of_lists(cmark_parser *parser, cmark_node ** bptr)
 static cmark_node*
 finalize(cmark_parser *parser, cmark_node* b)
 {
-	int firstlinelen;
-	int pos;
+	bufsize_t pos;
 	cmark_node* item;
 	cmark_node* subitem;
 	cmark_node* parent;
@@ -204,9 +222,11 @@ finalize(cmark_parser *parser, cmark_node* b)
 	           (b->type == NODE_CODE_BLOCK && b->as.code.fenced) ||
 	           (b->type == NODE_HEADER && b->as.header.setext)) {
 		b->end_line = parser->line_number;
-		b->end_column = parser->curline->size -
-		                (parser->curline->ptr[parser->curline->size - 1] == '\n' ?
-		                 1 : 0);
+		b->end_column = parser->curline->size;
+		if (b->end_column && parser->curline->ptr[b->end_column - 1] == '\n')
+			b->end_column -= 1;
+		if (b->end_column && parser->curline->ptr[b->end_column - 1] == '\r')
+			b->end_column -= 1;
 	} else {
 		b->end_line = parser->line_number - 1;
 		b->end_column = parser->last_line_length;
@@ -232,19 +252,27 @@ finalize(cmark_parser *parser, cmark_node* b)
 		} else {
 
 			// first line of contents becomes info
-			firstlinelen = cmark_strbuf_strchr(&b->string_content, '\n', 0);
+			for (pos = 0; pos < b->string_content.size; ++pos) {
+				if (S_is_line_end_char(b->string_content.ptr[pos]))
+					break;
+			}
+			assert(pos < b->string_content.size);
 
 			cmark_strbuf tmp = GH_BUF_INIT;
 			houdini_unescape_html_f(
 			    &tmp,
 			    b->string_content.ptr,
-			    firstlinelen
+			    pos
 			);
 			cmark_strbuf_trim(&tmp);
 			cmark_strbuf_unescape(&tmp);
 			b->as.code.info = cmark_chunk_buf_detach(&tmp);
 
-			cmark_strbuf_drop(&b->string_content, firstlinelen + 1);
+			if (b->string_content.ptr[pos] == '\r')
+				pos += 1;
+			if (b->string_content.ptr[pos] == '\n')
+				pos += 1;
+			cmark_strbuf_drop(&b->string_content, pos);
 		}
 		b->as.code.literal = cmark_chunk_buf_detach(&b->string_content);
 		break;
@@ -339,10 +367,10 @@ static void process_inlines(cmark_node* root, cmark_reference_map *refmap, int o
 // Attempts to parse a list item marker (bullet or enumerated).
 // On success, returns length of the marker, and populates
 // data with the details.  On failure, returns 0.
-static int parse_list_marker(cmark_chunk *input, int pos, cmark_list **dataptr)
+static bufsize_t parse_list_marker(cmark_chunk *input, bufsize_t pos, cmark_list **dataptr)
 {
 	unsigned char c;
-	int startpos;
+	bufsize_t startpos;
 	cmark_list *data;
 
 	startpos = pos;
@@ -467,27 +495,39 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len,
 	const unsigned char *end = buffer + len;
 
 	while (buffer < end) {
-		const unsigned char *eol
-		    = (const unsigned char *)memchr(buffer, '\n',
-		                                    end - buffer);
+		const unsigned char *eol;
 		size_t line_len;
+		bufsize_t bufsize;
+
+		for (eol = buffer; eol < end; ++eol) {
+			if (S_is_line_end_char(*eol))
+				break;
+		}
+		if (eol >= end)
+			eol = NULL;
 
 		if (eol) {
-			line_len = eol + 1 - buffer;
+			if (eol < end && *eol == '\r')
+				eol++;
+			if (eol < end && *eol == '\n')
+				eol++;
+			line_len = eol - buffer;
 		} else if (eof) {
 			line_len = end - buffer;
 		} else {
-			cmark_strbuf_put(parser->linebuf, buffer, end - buffer);
+			bufsize = cmark_strbuf_check_bufsize(end - buffer);
+			cmark_strbuf_put(parser->linebuf, buffer, bufsize);
 			break;
 		}
 
+		bufsize = cmark_strbuf_check_bufsize(line_len);
 		if (parser->linebuf->size > 0) {
-			cmark_strbuf_put(parser->linebuf, buffer, line_len);
+			cmark_strbuf_put(parser->linebuf, buffer, bufsize);
 			S_process_line(parser, parser->linebuf->ptr,
 			               parser->linebuf->size);
 			cmark_strbuf_clear(parser->linebuf);
 		} else {
-			S_process_line(parser, buffer, line_len);
+			S_process_line(parser, buffer, bufsize);
 		}
 
 		buffer += line_len;
@@ -496,7 +536,7 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len,
 
 static void chop_trailing_hashtags(cmark_chunk *ch)
 {
-	int n, orig_n;
+	bufsize_t n, orig_n;
 
 	cmark_chunk_rtrim(ch);
 	orig_n = n = ch->len - 1;
@@ -513,29 +553,42 @@ static void chop_trailing_hashtags(cmark_chunk *ch)
 }
 
 static void
-S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
+S_find_first_nonspace(cmark_parser *parser, cmark_chunk *input)
+{
+	parser->first_nonspace = parser->offset;
+	while (peek_at(input, parser->first_nonspace) == ' ') {
+		parser->first_nonspace++;
+	}
+
+	parser->indent = parser->first_nonspace - parser->offset;
+	parser->blank = S_is_line_end_char(peek_at(input, parser->first_nonspace));
+}
+
+static void
+S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t bytes)
 {
 	cmark_node* last_matched_container;
-	int offset = 0;
-	int matched = 0;
+	bufsize_t matched = 0;
 	int lev = 0;
 	int i;
 	cmark_list *data = NULL;
 	bool all_matched = true;
 	cmark_node* container;
-	bool blank = false;
-	int first_nonspace;
-	int indent;
+	bool indented;
 	cmark_chunk input;
 	bool maybe_lazy;
 
 	utf8proc_detab(parser->curline, buffer, bytes);
+	parser->offset = 0;
+	parser->blank = false;
 
 	// Add a newline to the end if not present:
 	// TODO this breaks abstraction:
-	if (parser->curline->ptr[parser->curline->size - 1] != '\n') {
+	if (parser->curline->size > 0 &&
+	    !S_is_line_end_char(parser->curline->ptr[parser->curline->size - 1])) {
 		cmark_strbuf_putc(parser->curline, '\n');
 	}
+
 	input.data = parser->curline->ptr;
 	input.len = parser->curline->size;
 
@@ -550,32 +603,26 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 	while (container->last_child && container->last_child->open) {
 		container = container->last_child;
 
-		first_nonspace = offset;
-		while (peek_at(&input, first_nonspace) == ' ') {
-			first_nonspace++;
-		}
-
-		indent = first_nonspace - offset;
-		blank = peek_at(&input, first_nonspace) == '\n';
+		S_find_first_nonspace(parser, &input);
 
 		if (container->type == NODE_BLOCK_QUOTE) {
-			matched = indent <= 3 && peek_at(&input, first_nonspace) == '>';
+			matched = parser->indent <= 3 && peek_at(&input, parser->first_nonspace) == '>';
 			if (matched) {
-				offset = first_nonspace + 1;
-				if (peek_at(&input, offset) == ' ')
-					offset++;
+				parser->offset = parser->first_nonspace + 1;
+				if (peek_at(&input, parser->offset) == ' ')
+					parser->offset++;
 			} else {
 				all_matched = false;
 			}
 
 		} else if (container->type == NODE_ITEM) {
 
-			if (indent >= container->as.list.marker_offset +
+			if (parser->indent >= container->as.list.marker_offset +
 			    container->as.list.padding) {
-				offset += container->as.list.marker_offset +
+				parser->offset += container->as.list.marker_offset +
 				          container->as.list.padding;
-			} else if (blank) {
-				offset = first_nonspace;
+			} else if (parser->blank) {
+				parser->offset = parser->first_nonspace;
 			} else {
 				all_matched = false;
 			}
@@ -583,34 +630,34 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 		} else if (container->type == NODE_CODE_BLOCK) {
 
 			if (!container->as.code.fenced) { // indented
-				if (indent >= CODE_INDENT) {
-					offset += CODE_INDENT;
-				} else if (blank) {
-					offset = first_nonspace;
+				if (parser->indent >= CODE_INDENT) {
+					parser->offset += CODE_INDENT;
+				} else if (parser->blank) {
+					parser->offset = parser->first_nonspace;
 				} else {
 					all_matched = false;
 				}
 			} else { // fenced
 				matched = 0;
-				if (indent <= 3 &&
-				    (peek_at(&input, first_nonspace) ==
+				if (parser->indent <= 3 &&
+				    (peek_at(&input, parser->first_nonspace) ==
 				     container->as.code.fence_char)) {
 					matched = scan_close_code_fence(&input,
-					                                first_nonspace);
+					                                parser->first_nonspace);
 				}
 				if (matched >= container->as.code.fence_length) {
 					// closing fence - and since we're at
 					// the end of a line, we can return:
 					all_matched = false;
-					offset += matched;
+					parser->offset += matched;
 					parser->current = finalize(parser, container);
 					goto finished;
 				} else {
-					// skip opt. spaces of fence offset
+					// skip opt. spaces of fence parser->offset
 					i = container->as.code.fence_offset;
 					while (i > 0 &&
-					       peek_at(&input, offset) == ' ') {
-						offset++;
+					       peek_at(&input, parser->offset) == ' ') {
+						parser->offset++;
 						i--;
 					}
 				}
@@ -622,13 +669,13 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 
 		} else if (container->type == NODE_HTML) {
 
-			if (blank) {
+			if (parser->blank) {
 				all_matched = false;
 			}
 
 		} else if (container->type == NODE_PARAGRAPH) {
 
-			if (blank) {
+			if (parser->blank) {
 				all_matched = false;
 			}
 
@@ -643,7 +690,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 	last_matched_container = container;
 
 	// check to see if we've hit 2nd blank line, break out of list:
-	if (blank && container->last_line_blank) {
+	if (parser->blank && container->last_line_blank) {
 		break_out_of_lists(parser, &container);
 	}
 
@@ -652,40 +699,23 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 	while (container->type != NODE_CODE_BLOCK &&
 	       container->type != NODE_HTML) {
 
-		first_nonspace = offset;
-		while (peek_at(&input, first_nonspace) == ' ')
-			first_nonspace++;
-
-		indent = first_nonspace - offset;
-		blank = peek_at(&input, first_nonspace) == '\n';
-
-		if (indent >= CODE_INDENT) {
-			if (!maybe_lazy && !blank) {
-				offset += CODE_INDENT;
-				container = add_child(parser, container, NODE_CODE_BLOCK, offset + 1);
-				container->as.code.fenced = false;
-				container->as.code.fence_char = 0;
-				container->as.code.fence_length = 0;
-				container->as.code.fence_offset = 0;
-				container->as.code.info = cmark_chunk_literal("");
-			} else { // indent > 4 in lazy line
-				break;
-			}
+		S_find_first_nonspace(parser, &input);
+		indented = parser->indent >= CODE_INDENT;
 
-		} else if (peek_at(&input, first_nonspace) == '>') {
+		if (!indented && peek_at(&input, parser->first_nonspace) == '>') {
 
-			offset = first_nonspace + 1;
+			parser->offset = parser->first_nonspace + 1;
 			// optional following character
-			if (peek_at(&input, offset) == ' ')
-				offset++;
-			container = add_child(parser, container, NODE_BLOCK_QUOTE, offset + 1);
+			if (peek_at(&input, parser->offset) == ' ')
+				parser->offset++;
+			container = add_child(parser, container, NODE_BLOCK_QUOTE, parser->offset + 1);
 
-		} else if ((matched = scan_atx_header_start(&input, first_nonspace))) {
+		} else if (!indented && (matched = scan_atx_header_start(&input, parser->first_nonspace))) {
 
-			offset = first_nonspace + matched;
-			container = add_child(parser, container, NODE_HEADER, offset + 1);
+			parser->offset = parser->first_nonspace + matched;
+			container = add_child(parser, container, NODE_HEADER, parser->offset + 1);
 
-			int hashpos = cmark_chunk_strchr(&input, '#', first_nonspace);
+			bufsize_t hashpos = cmark_chunk_strchr(&input, '#', parser->first_nonspace);
 			int level = 0;
 
 			while (peek_at(&input, hashpos) == '#') {
@@ -695,78 +725,95 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 			container->as.header.level = level;
 			container->as.header.setext = false;
 
-		} else if ((matched = scan_open_code_fence(&input, first_nonspace))) {
+		} else if (!indented && (matched = scan_open_code_fence(&input, parser->first_nonspace))) {
 
-			container = add_child(parser, container, NODE_CODE_BLOCK, first_nonspace + 1);
+			container = add_child(parser, container, NODE_CODE_BLOCK, parser->first_nonspace + 1);
 			container->as.code.fenced = true;
-			container->as.code.fence_char = peek_at(&input, first_nonspace);
+			container->as.code.fence_char = peek_at(&input, parser->first_nonspace);
 			container->as.code.fence_length = matched;
-			container->as.code.fence_offset = first_nonspace - offset;
+			container->as.code.fence_offset = parser->first_nonspace - parser->offset;
 			container->as.code.info = cmark_chunk_literal("");
-			offset = first_nonspace + matched;
+			parser->offset = parser->first_nonspace + matched;
 
-		} else if ((matched = scan_html_block_tag(&input, first_nonspace))) {
+		} else if (!indented && (matched = scan_html_block_tag(&input, parser->first_nonspace))) {
 
-			container = add_child(parser, container, NODE_HTML, first_nonspace + 1);
-			// note, we don't adjust offset because the tag is part of the text
+			container = add_child(parser, container, NODE_HTML, parser->first_nonspace + 1);
+			// note, we don't adjust parser->offset because the tag is part of the text
 
-		} else if (container->type == NODE_PARAGRAPH &&
-		           (lev = scan_setext_header_line(&input, first_nonspace)) &&
+		} else if (!indented &&
+		           container->type == NODE_PARAGRAPH &&
+		           (lev = scan_setext_header_line(&input, parser->first_nonspace)) &&
 		           // check that there is only one line in the paragraph:
-		           cmark_strbuf_strrchr(&container->string_content, '\n',
-		                                cmark_strbuf_len(&container->string_content) - 2) < 0) {
+		           (cmark_strbuf_strrchr(&container->string_content, '\n',
+		                                 cmark_strbuf_len(&container->string_content) - 2) < 0)) {
 
 			container->type = NODE_HEADER;
 			container->as.header.level = lev;
 			container->as.header.setext = true;
-			offset = input.len - 1;
+			parser->offset = input.len - 1;
 
-		} else if (!(container->type == NODE_PARAGRAPH && !all_matched) &&
-		           (matched = scan_hrule(&input, first_nonspace))) {
+		} else if (!indented &&
+		           !(container->type == NODE_PARAGRAPH &&
+		             !all_matched) &&
+		           (matched = scan_hrule(&input, parser->first_nonspace))) {
 
 			// it's only now that we know the line is not part of a setext header:
-			container = add_child(parser, container, NODE_HRULE, first_nonspace + 1);
+			container = add_child(parser, container, NODE_HRULE, parser->first_nonspace + 1);
 			container = finalize(parser, container);
-			offset = input.len - 1;
+			parser->offset = input.len - 1;
 
-		} else if ((matched = parse_list_marker(&input, first_nonspace, &data))) {
+		} else if ((matched = parse_list_marker(&input, parser->first_nonspace, &data)) &&
+		           (!indented || container->type == NODE_LIST)) {
+			// Note that we can have new list items starting with >= 4
+			// spaces indent, as long as the list container is still open.
 
 			// compute padding:
-			offset = first_nonspace + matched;
+			parser->offset = parser->first_nonspace + matched;
 			i = 0;
-			while (i <= 5 && peek_at(&input, offset + i) == ' ') {
+			while (i <= 5 && peek_at(&input, parser->offset + i) == ' ') {
 				i++;
 			}
 			// i = number of spaces after marker, up to 5
-			if (i >= 5 || i < 1 || peek_at(&input, offset) == '\n') {
+			if (i >= 5 || i < 1 ||
+			    S_is_line_end_char(peek_at(&input, parser->offset))) {
 				data->padding = matched + 1;
 				if (i > 0) {
-					offset += 1;
+					parser->offset += 1;
 				}
 			} else {
 				data->padding = matched + i;
-				offset += i;
+				parser->offset += i;
 			}
 
 			// check container; if it's a list, see if this list item
 			// can continue the list; otherwise, create a list container.
 
-			data->marker_offset = indent;
+			data->marker_offset = parser->indent;
 
 			if (container->type != NODE_LIST ||
 			    !lists_match(&container->as.list, data)) {
 				container = add_child(parser, container, NODE_LIST,
-				                      first_nonspace + 1);
+				                      parser->first_nonspace + 1);
 
 				memcpy(&container->as.list, data, sizeof(*data));
 			}
 
 			// add the list item
 			container = add_child(parser, container, NODE_ITEM,
-			                      first_nonspace + 1);
+			                      parser->first_nonspace + 1);
 			/* TODO: static */
 			memcpy(&container->as.list, data, sizeof(*data));
 			free(data);
+
+		} else if (indented && !maybe_lazy && !parser->blank) {
+			parser->offset += CODE_INDENT;
+			container = add_child(parser, container, NODE_CODE_BLOCK, parser->offset + 1);
+			container->as.code.fenced = false;
+			container->as.code.fence_char = 0;
+			container->as.code.fence_length = 0;
+			container->as.code.fence_offset = 0;
+			container->as.code.info = cmark_chunk_literal("");
+
 		} else {
 			break;
 		}
@@ -778,17 +825,12 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 		maybe_lazy = false;
 	}
 
-	// what remains at offset is a text line.  add the text to the
+	// what remains at parser->offset is a text line.  add the text to the
 	// appropriate container.
 
-	first_nonspace = offset;
-	while (peek_at(&input, first_nonspace) == ' ')
-		first_nonspace++;
+	S_find_first_nonspace(parser, &input);
 
-	indent = first_nonspace - offset;
-	blank = peek_at(&input, first_nonspace) == '\n';
-
-	if (blank && container->last_child) {
+	if (parser->blank && container->last_child) {
 		container->last_child->last_line_blank = true;
 	}
 
@@ -796,7 +838,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 	// and we don't count blanks in fenced code for purposes of tight/loose
 	// lists or breaking out of lists.  we also don't set last_line_blank
 	// on an empty list item.
-	container->last_line_blank = (blank &&
+	container->last_line_blank = (parser->blank &&
 	                              container->type != NODE_BLOCK_QUOTE &&
 	                              container->type != NODE_HEADER &&
 	                              !(container->type == NODE_CODE_BLOCK &&
@@ -813,11 +855,11 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 
 	if (parser->current != last_matched_container &&
 	    container == last_matched_container &&
-	    !blank &&
+	    !parser->blank &&
 	    parser->current->type == NODE_PARAGRAPH &&
 	    cmark_strbuf_len(&parser->current->string_content) > 0) {
 
-		add_line(parser->current, &input, offset);
+		add_line(parser->current, &input, parser->offset);
 
 	} else { // not a lazy continuation
 
@@ -830,9 +872,9 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 		if (container->type == NODE_CODE_BLOCK ||
 		    container->type == NODE_HTML) {
 
-			add_line(container, &input, offset);
+			add_line(container, &input, parser->offset);
 
-		} else if (blank) {
+		} else if (parser->blank) {
 
 			// ??? do nothing
 
@@ -842,22 +884,26 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 			    container->as.header.setext == false) {
 				chop_trailing_hashtags(&input);
 			}
-			add_line(container, &input, first_nonspace);
+			add_line(container, &input, parser->first_nonspace);
 
 		} else {
 			// create paragraph container for line
-			container = add_child(parser, container, NODE_PARAGRAPH, first_nonspace + 1);
-			add_line(container, &input, first_nonspace);
+			container = add_child(parser, container, NODE_PARAGRAPH, parser->first_nonspace + 1);
+			add_line(container, &input, parser->first_nonspace);
 
 		}
 
 		parser->current = container;
 	}
 finished:
-	parser->last_line_length = parser->curline->size -
-	                           (parser->curline->ptr[parser->curline->size - 1] == '\n' ?
-	                            1 : 0);
-	;
+	parser->last_line_length = parser->curline->size;
+	if (parser->last_line_length &&
+	    parser->curline->ptr[parser->last_line_length - 1] == '\n')
+		parser->last_line_length -= 1;
+	if (parser->last_line_length &&
+	    parser->curline->ptr[parser->last_line_length - 1] == '\r')
+		parser->last_line_length -= 1;
+
 	cmark_strbuf_clear(parser->curline);
 
 }
diff --git a/buffer.c b/buffer.c
index 5ec8b49..7d16af8 100644
--- a/buffer.c
+++ b/buffer.c
@@ -4,6 +4,7 @@
 #include <string.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <stdint.h>
 
 #include "config.h"
 #include "cmark_ctype.h"
@@ -13,83 +14,88 @@
  * assume ptr is non-NULL and zero terminated even for new cmark_strbufs.
  */
 unsigned char cmark_strbuf__initbuf[1];
-unsigned char cmark_strbuf__oom[1];
-
-#define ENSURE_SIZE(b, d)					\
-	if ((d) > buf->asize && cmark_strbuf_grow(b, (d)) < 0)	\
-		return -1;
 
 #ifndef MIN
 #define MIN(x,y)  ((x<y) ? x : y)
 #endif
 
-void cmark_strbuf_init(cmark_strbuf *buf, int initial_size)
+void cmark_strbuf_init(cmark_strbuf *buf, bufsize_t initial_size)
 {
 	buf->asize = 0;
 	buf->size = 0;
 	buf->ptr = cmark_strbuf__initbuf;
 
-	if (initial_size)
+	if (initial_size > 0)
 		cmark_strbuf_grow(buf, initial_size);
 }
 
-int cmark_strbuf_try_grow(cmark_strbuf *buf, int target_size, bool mark_oom)
+void cmark_strbuf_overflow_err() {
+	fprintf(stderr, "String buffer overflow");
+	abort();
+}
+
+static inline void
+S_strbuf_grow_by(cmark_strbuf *buf, size_t add) {
+	size_t target_size = (size_t)buf->size + add;
+
+	if (target_size < add             /* Integer overflow. */
+	    || target_size > BUFSIZE_MAX  /* Truncation overflow. */
+	) {
+		cmark_strbuf_overflow_err();
+		return; /* unreachable */
+	}
+
+	if ((bufsize_t)target_size >= buf->asize)
+		cmark_strbuf_grow(buf, (bufsize_t)target_size);
+}
+
+void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size)
 {
 	unsigned char *new_ptr;
-	int new_size;
-
-	if (buf->ptr == cmark_strbuf__oom)
-		return -1;
 
-	if (target_size <= buf->asize)
-		return 0;
+	if (target_size < buf->asize)
+		return;
 
 	if (buf->asize == 0) {
-		new_size = target_size;
 		new_ptr = NULL;
 	} else {
-		new_size = buf->asize;
 		new_ptr = buf->ptr;
 	}
 
-	/* grow the buffer size by 1.5, until it's big enough
-	 * to fit our target size */
-	while (new_size < target_size)
-		new_size = (new_size << 1) - (new_size >> 1);
+	/* Oversize the buffer by 50% to guarantee amortized linear time
+	 * complexity on append operations. */
+	size_t new_size = (size_t)target_size + (size_t)target_size / 2;
+
+	/* Account for terminating null byte. */
+	new_size += 1;
 
 	/* round allocation up to multiple of 8 */
 	new_size = (new_size + 7) & ~7;
 
+	if (new_size < (size_t)target_size  /* Integer overflow. */
+	    || new_size > BUFSIZE_MAX       /* Truncation overflow. */
+	) {
+		if (target_size >= BUFSIZE_MAX) {
+			/* No space for terminating null byte. */
+			cmark_strbuf_overflow_err();
+			return; /* unreachable */
+		}
+		/* Oversize by the maximum possible amount. */
+		new_size = BUFSIZE_MAX;
+	}
+
 	new_ptr = (unsigned char *)realloc(new_ptr, new_size);
 
 	if (!new_ptr) {
-		if (mark_oom)
-			buf->ptr = cmark_strbuf__oom;
-		return -1;
+		perror("realloc in cmark_strbuf_grow");
+		abort();
 	}
 
-	buf->asize = new_size;
+	buf->asize = (bufsize_t)new_size;
 	buf->ptr   = new_ptr;
-
-	/* truncate the existing buffer size if necessary */
-	if (buf->size >= buf->asize)
-		buf->size = buf->asize - 1;
-	buf->ptr[buf->size] = '\0';
-
-	return 0;
-}
-
-int cmark_strbuf_grow(cmark_strbuf *buf, int target_size)
-{
-	return cmark_strbuf_try_grow(buf, target_size, true);
-}
-
-bool cmark_strbuf_oom(const cmark_strbuf *buf)
-{
-	return (buf->ptr == cmark_strbuf__oom);
 }
 
-size_t cmark_strbuf_len(const cmark_strbuf *buf)
+bufsize_t cmark_strbuf_len(const cmark_strbuf *buf)
 {
 	return buf->size;
 }
@@ -98,7 +104,7 @@ void cmark_strbuf_free(cmark_strbuf *buf)
 {
 	if (!buf) return;
 
-	if (buf->ptr != cmark_strbuf__initbuf && buf->ptr != cmark_strbuf__oom)
+	if (buf->ptr != cmark_strbuf__initbuf)
 		free(buf->ptr);
 
 	cmark_strbuf_init(buf, 0);
@@ -112,106 +118,106 @@ void cmark_strbuf_clear(cmark_strbuf *buf)
 		buf->ptr[0] = '\0';
 }
 
-int cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, int len)
+void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, bufsize_t len)
 {
 	if (len <= 0 || data == NULL) {
 		cmark_strbuf_clear(buf);
 	} else {
 		if (data != buf->ptr) {
-			ENSURE_SIZE(buf, len + 1);
+			if (len >= buf->asize)
+				cmark_strbuf_grow(buf, len);
 			memmove(buf->ptr, data, len);
 		}
 		buf->size = len;
 		buf->ptr[buf->size] = '\0';
 	}
-	return 0;
 }
 
-int cmark_strbuf_sets(cmark_strbuf *buf, const char *string)
+void cmark_strbuf_sets(cmark_strbuf *buf, const char *string)
 {
-	return cmark_strbuf_set(buf,
-	                        (const unsigned char *)string,
-	                        string ? strlen(string) : 0);
+	cmark_strbuf_set(buf, (const unsigned char *)string,
+	                 string ? cmark_strbuf_safe_strlen(string) : 0);
 }
 
-int cmark_strbuf_putc(cmark_strbuf *buf, int c)
+void cmark_strbuf_putc(cmark_strbuf *buf, int c)
 {
-	ENSURE_SIZE(buf, buf->size + 2);
+	S_strbuf_grow_by(buf, 1);
 	buf->ptr[buf->size++] = c;
 	buf->ptr[buf->size] = '\0';
-	return 0;
 }
 
-int cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, int len)
+void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, bufsize_t len)
 {
 	if (len <= 0)
-		return 0;
+		return;
 
-	ENSURE_SIZE(buf, buf->size + len + 1);
+	S_strbuf_grow_by(buf, len);
 	memmove(buf->ptr + buf->size, data, len);
 	buf->size += len;
 	buf->ptr[buf->size] = '\0';
-	return 0;
 }
 
-int cmark_strbuf_puts(cmark_strbuf *buf, const char *string)
+void cmark_strbuf_puts(cmark_strbuf *buf, const char *string)
 {
-	return cmark_strbuf_put(buf, (const unsigned char *)string, strlen(string));
+	cmark_strbuf_put(buf, (const unsigned char *)string,
+			 cmark_strbuf_safe_strlen(string));
 }
 
-int cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap)
+void cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap)
 {
-	const int expected_size = buf->size + (strlen(format) * 2);
-	int len;
-
-	ENSURE_SIZE(buf, expected_size);
+	size_t expected_size = strlen(format);
+	if (expected_size <= SIZE_MAX / 2)
+		expected_size *= 2;
+	S_strbuf_grow_by(buf, expected_size);
 
 	while (1) {
 		va_list args;
 		va_copy(args, ap);
 
-		len = vsnprintf(
+		int len = vsnprintf(
 		          (char *)buf->ptr + buf->size,
 		          buf->asize - buf->size,
 		          format, args
 		      );
+#ifndef HAVE_C99_SNPRINTF
+		// Assume we're on Windows.
+		if (len < 0) {
+			len = _vscprintf(format, args);
+		}
+#endif
 
 		va_end(args);
 
 		if (len < 0) {
-			free(buf->ptr);
-			buf->ptr = cmark_strbuf__oom;
-			return -1;
+			perror("vsnprintf in cmark_strbuf_vprintf");
+			abort();
 		}
 
-		if (len + 1 <= buf->asize - buf->size) {
+		if ((size_t)len < (size_t)(buf->asize - buf->size)) {
 			buf->size += len;
 			break;
 		}
 
-		ENSURE_SIZE(buf, buf->size + len + 1);
+		S_strbuf_grow_by(buf, len);
 	}
-
-	return 0;
 }
 
-int cmark_strbuf_printf(cmark_strbuf *buf, const char *format, ...)
+void cmark_strbuf_printf(cmark_strbuf *buf, const char *format, ...)
 {
-	int r;
 	va_list ap;
 
 	va_start(ap, format);
-	r = cmark_strbuf_vprintf(buf, format, ap);
+	cmark_strbuf_vprintf(buf, format, ap);
 	va_end(ap);
-
-	return r;
 }
 
-void cmark_strbuf_copy_cstr(char *data, int datasize, const cmark_strbuf *buf)
+void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize, const cmark_strbuf *buf)
 {
-	int copylen;
+	bufsize_t copylen;
 
-	assert(data && datasize && buf);
+	assert(buf);
+	if (!data || datasize <= 0)
+		return;
 
 	data[0] = '\0';
 
@@ -236,7 +242,7 @@ unsigned char *cmark_strbuf_detach(cmark_strbuf *buf)
 {
 	unsigned char *data = buf->ptr;
 
-	if (buf->asize == 0 || buf->ptr == cmark_strbuf__oom) {
+	if (buf->asize == 0) {
 		/* return an empty string */
 		return (unsigned char *)calloc(1, 1);
 	}
@@ -245,22 +251,6 @@ unsigned char *cmark_strbuf_detach(cmark_strbuf *buf)
 	return data;
 }
 
-void cmark_strbuf_attach(cmark_strbuf *buf, unsigned char *ptr, int asize)
-{
-	cmark_strbuf_free(buf);
-
-	if (ptr) {
-		buf->ptr = ptr;
-		buf->size = strlen((char *)ptr);
-		if (asize)
-			buf->asize = (asize < buf->size) ? buf->size + 1 : asize;
-		else /* pass 0 to fall back on strlen + 1 */
-			buf->asize = buf->size + 1;
-	} else {
-		cmark_strbuf_grow(buf, asize);
-	}
-}
-
 int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b)
 {
 	int result = memcmp(a->ptr, b->ptr, MIN(a->size, b->size));
@@ -268,20 +258,28 @@ int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b)
 	       (a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0;
 }
 
-int cmark_strbuf_strchr(const cmark_strbuf *buf, int c, int pos)
+bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos)
 {
+	if (pos >= buf->size)
+		return -1;
+	if (pos < 0)
+		pos = 0;
+
 	const unsigned char *p = (unsigned char *)memchr(buf->ptr + pos, c, buf->size - pos);
 	if (!p)
 		return -1;
 
-	return (int)(p - (const unsigned char *)buf->ptr);
+	return (bufsize_t)(p - (const unsigned char *)buf->ptr);
 }
 
-int cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, int pos)
+bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos)
 {
-	int i;
+	if (pos < 0 || buf->size == 0)
+		return -1;
+	if (pos >= buf->size)
+		pos = buf->size - 1;
 
-	for (i = pos; i >= 0; i--) {
+	for (bufsize_t i = pos; i >= 0; i--) {
 		if (buf->ptr[i] == (unsigned char) c)
 			return i;
 	}
@@ -289,17 +287,22 @@ int cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, int pos)
 	return -1;
 }
 
-void cmark_strbuf_truncate(cmark_strbuf *buf, int len)
+void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len)
 {
+	if (len < 0)
+		len = 0;
+
 	if (len < buf->size) {
 		buf->size = len;
 		buf->ptr[buf->size] = '\0';
 	}
 }
 
-void cmark_strbuf_drop(cmark_strbuf *buf, int n)
+void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n)
 {
 	if (n > 0) {
+		if (n > buf->size)
+			n = buf->size;
 		buf->size = buf->size - n;
 		if (buf->size)
 			memmove(buf->ptr, buf->ptr + n, buf->size);
@@ -325,7 +328,7 @@ void cmark_strbuf_rtrim(cmark_strbuf *buf)
 
 void cmark_strbuf_trim(cmark_strbuf *buf)
 {
-	int i = 0;
+	bufsize_t i = 0;
 
 	if (!buf->size)
 		return;
@@ -343,7 +346,7 @@ void cmark_strbuf_trim(cmark_strbuf *buf)
 void cmark_strbuf_normalize_whitespace(cmark_strbuf *s)
 {
 	bool last_char_was_space = false;
-	int r, w;
+	bufsize_t r, w;
 
 	for (r = 0, w = 0; r < s->size; ++r) {
 		switch (s->ptr[r]) {
@@ -368,7 +371,7 @@ void cmark_strbuf_normalize_whitespace(cmark_strbuf *s)
 // Destructively unescape a string: remove backslashes before punctuation chars.
 extern void cmark_strbuf_unescape(cmark_strbuf *buf)
 {
-	int r, w;
+	bufsize_t r, w;
 
 	for (r = 0, w = 0; r < buf->size; ++r) {
 		if (buf->ptr[r] == '\\' && cmark_ispunct(buf->ptr[r + 1]))
diff --git a/buffer.h b/buffer.h
index fb9f910..babd051 100644
--- a/buffer.h
+++ b/buffer.h
@@ -3,22 +3,25 @@
 
 #include <stddef.h>
 #include <stdarg.h>
+#include <string.h>
+#include <limits.h>
 #include "config.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+typedef int bufsize_t;
+
 typedef struct {
 	unsigned char *ptr;
-	int asize, size;
+	bufsize_t asize, size;
 } cmark_strbuf;
 
 extern unsigned char cmark_strbuf__initbuf[];
 
-extern unsigned char cmark_strbuf__oom[];
-
 #define GH_BUF_INIT { cmark_strbuf__initbuf, 0, 0 }
+#define BUFSIZE_MAX INT_MAX
 
 /**
  * Initialize a cmark_strbuf structure.
@@ -26,51 +29,22 @@ extern unsigned char cmark_strbuf__oom[];
  * For the cases where GH_BUF_INIT cannot be used to do static
  * initialization.
  */
-void cmark_strbuf_init(cmark_strbuf *buf, int initial_size);
-
-/**
- * Attempt to grow the buffer to hold at least `target_size` bytes.
- *
- * If the allocation fails, this will return an error.  If mark_oom is true,
- * this will mark the buffer as invalid for future operations; if false,
- * existing buffer content will be preserved, but calling code must handle
- * that buffer was not expanded.
- */
-int cmark_strbuf_try_grow(cmark_strbuf *buf, int target_size, bool mark_oom);
+void cmark_strbuf_init(cmark_strbuf *buf, bufsize_t initial_size);
 
 /**
  * Grow the buffer to hold at least `target_size` bytes.
- *
- * If the allocation fails, this will return an error and the buffer will be
- * marked as invalid for future operations, invaliding contents.
- *
- * @return 0 on success or -1 on failure
  */
-int cmark_strbuf_grow(cmark_strbuf *buf, int target_size);
+void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size);
 
 void cmark_strbuf_free(cmark_strbuf *buf);
 void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b);
 
-/**
- * Test if there have been any reallocation failures with this cmark_strbuf.
- *
- * Any function that writes to a cmark_strbuf can fail due to memory allocation
- * issues.  If one fails, the cmark_strbuf will be marked with an OOM error and
- * further calls to modify the buffer will fail.  Check cmark_strbuf_oom() at the
- * end of your sequence and it will be true if you ran out of memory at any
- * point with that buffer.
- *
- * @return false if no error, true if allocation error
- */
-bool cmark_strbuf_oom(const cmark_strbuf *buf);
-
-size_t cmark_strbuf_len(const cmark_strbuf *buf);
+bufsize_t cmark_strbuf_len(const cmark_strbuf *buf);
 
 int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b);
 
-void cmark_strbuf_attach(cmark_strbuf *buf, unsigned char *ptr, int asize);
 unsigned char *cmark_strbuf_detach(cmark_strbuf *buf);
-void cmark_strbuf_copy_cstr(char *data, int datasize, const cmark_strbuf *buf);
+void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize, const cmark_strbuf *buf);
 
 static inline const char *cmark_strbuf_cstr(const cmark_strbuf *buf)
 {
@@ -79,33 +53,41 @@ static inline const char *cmark_strbuf_cstr(const cmark_strbuf *buf)
 
 #define cmark_strbuf_at(buf, n) ((buf)->ptr[n])
 
-/*
- * Functions below that return int value error codes will return 0 on
- * success or -1 on failure (which generally means an allocation failed).
- * Using a cmark_strbuf where the allocation has failed with result in -1 from
- * all further calls using that buffer.  As a result, you can ignore the
- * return code of these functions and call them in a series then just call
- * cmark_strbuf_oom at the end.
- */
-int cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, int len);
-int cmark_strbuf_sets(cmark_strbuf *buf, const char *string);
-int cmark_strbuf_putc(cmark_strbuf *buf, int c);
-int cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, int len);
-int cmark_strbuf_puts(cmark_strbuf *buf, const char *string);
-int cmark_strbuf_printf(cmark_strbuf *buf, const char *format, ...)
-CMARK_ATTRIBUTE((format (printf, 2, 3)));
-int cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap);
+void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, bufsize_t len);
+void cmark_strbuf_sets(cmark_strbuf *buf, const char *string);
+void cmark_strbuf_putc(cmark_strbuf *buf, int c);
+void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, bufsize_t len);
+void cmark_strbuf_puts(cmark_strbuf *buf, const char *string);
+void cmark_strbuf_printf(cmark_strbuf *buf, const char *format, ...)
+	CMARK_ATTRIBUTE((format (printf, 2, 3)));
+void cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap);
 void cmark_strbuf_clear(cmark_strbuf *buf);
 
-int cmark_strbuf_strchr(const cmark_strbuf *buf, int c, int pos);
-int cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, int pos);
-void cmark_strbuf_drop(cmark_strbuf *buf, int n);
-void cmark_strbuf_truncate(cmark_strbuf *buf, int len);
+bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos);
+bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos);
+void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n);
+void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len);
 void cmark_strbuf_rtrim(cmark_strbuf *buf);
 void cmark_strbuf_trim(cmark_strbuf *buf);
 void cmark_strbuf_normalize_whitespace(cmark_strbuf *s);
 void cmark_strbuf_unescape(cmark_strbuf *s);
 
+/* Print error and abort. */
+void cmark_strbuf_overflow_err(void);
+
+static inline bufsize_t
+cmark_strbuf_check_bufsize(size_t size) {
+	if (size > BUFSIZE_MAX) {
+		cmark_strbuf_overflow_err();
+	}
+	return (bufsize_t)size;
+}
+
+static inline bufsize_t
+cmark_strbuf_safe_strlen(const char *str) {
+	return cmark_strbuf_check_bufsize(strlen(str));
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/chunk.h b/chunk.h
index 54c4b16..f23a02d 100644
--- a/chunk.h
+++ b/chunk.h
@@ -7,10 +7,12 @@
 #include "cmark_ctype.h"
 #include "buffer.h"
 
+#define CMARK_CHUNK_EMPTY { NULL, 0, 0 }
+
 typedef struct {
 	unsigned char *data;
-	int len;
-	int alloc;  // also implies a NULL-terminated string
+	bufsize_t len;
+	bufsize_t alloc;  // also implies a NULL-terminated string
 } cmark_chunk;
 
 static inline void cmark_chunk_free(cmark_chunk *c)
@@ -49,10 +51,10 @@ static inline void cmark_chunk_trim(cmark_chunk *c)
 	cmark_chunk_rtrim(c);
 }
 
-static inline int cmark_chunk_strchr(cmark_chunk *ch, int c, int offset)
+static inline bufsize_t cmark_chunk_strchr(cmark_chunk *ch, int c, bufsize_t offset)
 {
 	const unsigned char *p = (unsigned char *)memchr(ch->data + offset, c, ch->len - offset);
-	return p ? (int)(p - ch->data) : ch->len;
+	return p ? (bufsize_t)(p - ch->data) : ch->len;
 }
 
 static inline const char *cmark_chunk_to_cstr(cmark_chunk *c)
@@ -64,7 +66,9 @@ static inline const char *cmark_chunk_to_cstr(cmark_chunk *c)
 	}
 	str = (unsigned char *)malloc(c->len + 1);
 	if(str != NULL) {
-		memcpy(str, c->data, c->len);
+		if(c->len > 0) {
+			memcpy(str, c->data, c->len);
+		}
 		str[c->len] = 0;
 	}
 	c->data  = str;
@@ -78,19 +82,26 @@ static inline void cmark_chunk_set_cstr(cmark_chunk *c, const char *str)
 	if (c->alloc) {
 		free(c->data);
 	}
-	c->len   = strlen(str);
-	c->data  = (unsigned char *)malloc(c->len + 1);
-	c->alloc = 1;
-	memcpy(c->data, str, c->len + 1);
+	if (str == NULL) {
+		c->len   = 0;
+		c->data  = NULL;
+		c->alloc = 0;
+	} else {
+		c->len   = cmark_strbuf_safe_strlen(str);
+		c->data  = (unsigned char *)malloc(c->len + 1);
+		c->alloc = 1;
+		memcpy(c->data, str, c->len + 1);
+	}
 }
 
 static inline cmark_chunk cmark_chunk_literal(const char *data)
 {
-	cmark_chunk c = {(unsigned char *)data, data ? strlen(data) : 0, 0};
+	bufsize_t len = data ? cmark_strbuf_safe_strlen(data) : 0;
+	cmark_chunk c = {(unsigned char *)data, len, 0};
 	return c;
 }
 
-static inline cmark_chunk cmark_chunk_dup(const cmark_chunk *ch, int pos, int len)
+static inline cmark_chunk cmark_chunk_dup(const cmark_chunk *ch, bufsize_t pos, bufsize_t len)
 {
 	cmark_chunk c = {ch->data + pos, len, 0};
 	return c;
diff --git a/cmark.c b/cmark.c
index 79ceabf..35765b1 100644
--- a/cmark.c
+++ b/cmark.c
@@ -9,7 +9,7 @@
 const int cmark_version = CMARK_VERSION;
 const char cmark_version_string[] = CMARK_VERSION_STRING;
 
-char *cmark_markdown_to_html(const char *text, int len, int options)
+char *cmark_markdown_to_html(const char *text, size_t len, int options)
 {
 	cmark_node *doc;
 	char *result;
diff --git a/cmark.h b/cmark.h
index e41d995..a7670e9 100644
--- a/cmark.h
+++ b/cmark.h
@@ -24,7 +24,7 @@ extern "C" {
  * UTF-8-encoded string.
  */
 CMARK_EXPORT
-char *cmark_markdown_to_html(const char *text, int len, int options);
+char *cmark_markdown_to_html(const char *text, size_t len, int options);
 
 /** ## Node Structure
  */
diff --git a/commonmark.c b/commonmark.c
index 805f139..4594748 100644
--- a/commonmark.c
+++ b/commonmark.c
@@ -20,7 +20,7 @@ struct render_state {
 	int column;
 	int width;
 	int need_cr;
-	int last_breakable;
+	bufsize_t last_breakable;
 	bool begin_line;
 	bool no_wrap;
 	bool in_tight_list_item;
@@ -237,28 +237,31 @@ shortest_unused_backtick_sequence(cmark_chunk *code)
 static bool
 is_autolink(cmark_node *node)
 {
-	const char *title;
-	const char *url;
+	cmark_chunk *title;
+	cmark_chunk *url;
+	cmark_node *link_text;
 
 	if (node->type != CMARK_NODE_LINK) {
 		return false;
 	}
 
-	url = cmark_node_get_url(node);
-	if (url == NULL ||
-	    _scan_scheme((unsigned char *)url) == 0) {
+	url = &node->as.link.url;
+	if (url->len == 0 || scan_scheme(url, 0) == 0) {
 		return false;
 	}
 
-	title = cmark_node_get_title(node);
+	title = &node->as.link.title;
 	// if it has a title, we can't treat it as an autolink:
-	if (title != NULL && strlen(title) > 0) {
+	if (title->len > 0) {
 		return false;
 	}
-	cmark_consolidate_text_nodes(node);
-	return (strncmp(url,
-	                (char*)node->as.literal.data,
-	                node->as.literal.len) == 0);
+
+	link_text = node->first_child;
+	cmark_consolidate_text_nodes(link_text);
+	return (url->len == link_text->as.literal.len &&
+	        strncmp((char*)url->data,
+	                (char*)link_text->as.literal.data,
+	                link_text->as.literal.len) == 0);
 }
 
 // if node is a block node, returns node.
@@ -285,11 +288,11 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
 	int numticks;
 	int i;
 	bool entering = (ev_type == CMARK_EVENT_ENTER);
-	const char *info;
-	const char *title;
+	cmark_chunk *info;
+	cmark_chunk *title;
 	cmark_strbuf listmarker = GH_BUF_INIT;
 	char *emph_delim;
-	int marker_width;
+	bufsize_t marker_width;
 
 	// Don't adjust tight list status til we've started the list.
 	// Otherwise we loose the blank line between a paragraph and
@@ -392,12 +395,12 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
 
 	case CMARK_NODE_CODE_BLOCK:
 		blankline(state);
-		info = cmark_node_get_fence_info(node);
+		info = &node->as.code.info;
 		code = &node->as.code.literal;
 		// use indented form if no info, and code doesn't
 		// begin or end with a blank line, and code isn't
 		// first thing in a list item
-		if ((info == NULL || strlen(info) == 0) &&
+		if (info->len == 0 &&
 		    (code->len > 2 &&
 		     !isspace(code->data[0]) &&
 		     !(isspace(code->data[code->len - 1]) &&
@@ -418,7 +421,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
 				lit(state, "`", false);
 			}
 			lit(state, " ", false);
-			out(state, cmark_chunk_literal(info), false, LITERAL);
+			out(state, *info, false, LITERAL);
 			cr(state);
 			out(state, node->as.code.literal, false, LITERAL);
 			cr(state);
@@ -538,11 +541,10 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
 				out(state,
 				    cmark_chunk_literal(cmark_node_get_url(node)),
 				    false, URL);
-				title = cmark_node_get_title(node);
-				if (title && strlen(title) > 0) {
+				title = &node->as.link.title;
+				if (title->len > 0) {
 					lit(state, " \"", true);
-					out(state, cmark_chunk_literal(title),
-					    false, TITLE);
+					out(state, *title, false, TITLE);
 					lit(state, "\"", false);
 				}
 				lit(state, ")", false);
@@ -556,10 +558,10 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
 		} else {
 			lit(state, "](", false);
 			out(state, cmark_chunk_literal(cmark_node_get_url(node)), false, URL);
-			title = cmark_node_get_title(node);
-			if (title && strlen(title) > 0) {
+			title = &node->as.link.title;
+			if (title->len > 0) {
 				lit(state, " \"", true);
-				out(state, cmark_chunk_literal(title), false, TITLE);
+				out(state, *title, false, TITLE);
 				lit(state, "\"", false);
 			}
 			lit(state, ")", false);
diff --git a/commonmark.go b/commonmark.go
index df479ed..aff8eb7 100644
--- a/commonmark.go
+++ b/commonmark.go
@@ -24,7 +24,7 @@ func Md2Html(mdtext string, options int) string {
 		mdtext += "\n"
 	}
 	mdCstr := C.CString(mdtext)
-	strLen := C.int(len(mdtext))
+	strLen := C.size_t(len(mdtext))
 	defer C.free(unsafe.Pointer(mdCstr))
 	htmlString := C.cmark_markdown_to_html(mdCstr, strLen, C.int(options))
 	defer C.free(unsafe.Pointer(htmlString))
diff --git a/config.h b/config.h
index 3de8c11..8eb09a1 100644
--- a/config.h
+++ b/config.h
@@ -21,3 +21,5 @@
 #ifndef HAVE_VA_COPY
   #define va_copy(dest, src) ((dest) = (src))
 #endif
+
+#define HAVE_C99_SNPRINTF
diff --git a/houdini.h b/houdini.h
index 9f00f6d..b926cf3 100644
--- a/houdini.h
+++ b/houdini.h
@@ -31,19 +31,12 @@ extern "C" {
 #define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
 #define HOUDINI_UNESCAPED_SIZE(x) (x)
 
-extern size_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, size_t size, int secure);
-extern int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, size_t size);
-extern void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_xml(cmark_strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_uri(cmark_strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_url(cmark_strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_unescape_uri(cmark_strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_unescape_url(cmark_strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_js(cmark_strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_unescape_js(cmark_strbuf *ob, const uint8_t *src, size_t size);
+extern bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, bufsize_t size);
+extern int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size);
+extern int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size, int secure);
+extern int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size);
+extern void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, bufsize_t size);
+extern int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size);
 
 #ifdef __cplusplus
 }
diff --git a/houdini_href_e.c b/houdini_href_e.c
index 7527780..7fb958a 100644
--- a/houdini_href_e.c
+++ b/houdini_href_e.c
@@ -49,10 +49,10 @@ static const char HREF_SAFE[] = {
 };
 
 int
-houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, size_t size)
+houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size)
 {
 	static const uint8_t hex_chars[] = "0123456789ABCDEF";
-	size_t  i = 0, org;
+	bufsize_t i = 0, org;
 	uint8_t hex_str[3];
 
 	hex_str[0] = '%';
diff --git a/houdini_html_e.c b/houdini_html_e.c
index 1a4c3e1..7f4b91f 100644
--- a/houdini_html_e.c
+++ b/houdini_html_e.c
@@ -45,9 +45,9 @@ static const char *HTML_ESCAPES[] = {
 };
 
 int
-houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, size_t size, int secure)
+houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size, int secure)
 {
-	size_t  i = 0, org, esc = 0;
+	bufsize_t i = 0, org, esc = 0;
 
 	while (i < size) {
 		org = i;
@@ -75,7 +75,7 @@ houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, size_t size, int secu
 }
 
 int
-houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, size_t size)
+houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size)
 {
 	return houdini_escape_html0(ob, src, size, 1);
 }
diff --git a/houdini_html_u.c b/houdini_html_u.c
index 2cb14b4..e57894d 100644
--- a/houdini_html_u.c
+++ b/houdini_html_u.c
@@ -7,37 +7,50 @@
 #include "utf8.h"
 #include "html_unescape.h"
 
-size_t
-houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size)
+bufsize_t
+houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, bufsize_t size)
 {
-	size_t i = 0;
+	bufsize_t i = 0;
 
-	if (size > 3 && src[0] == '#') {
-		int codepoint = 0;
+	if (size >= 3 && src[0] == '#') {
+		int codepoint  = 0;
+		int num_digits = 0;
 
 		if (_isdigit(src[1])) {
 			for (i = 1; i < size && _isdigit(src[i]); ++i) {
-				int cp = (codepoint * 10) + (src[i] - '0');
+				codepoint = (codepoint * 10) + (src[i] - '0');
 
-				if (cp < codepoint)
-					return 0;
-
-				codepoint = cp;
+				if (codepoint >= 0x110000) {
+					// Keep counting digits but
+					// avoid integer overflow.
+					codepoint = 0x110000;
+				}
 			}
+
+			num_digits = i - 1;
 		}
 
 		else if (src[1] == 'x' || src[1] == 'X') {
 			for (i = 2; i < size && _isxdigit(src[i]); ++i) {
-				int cp = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
+				codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
 
-				if (cp < codepoint)
-					return 0;
-
-				codepoint = cp;
+				if (codepoint >= 0x110000) {
+					// Keep counting digits but
+					// avoid integer overflow.
+					codepoint = 0x110000;
+				}
 			}
+
+			num_digits = i - 2;
 		}
 
-		if (i < size && src[i] == ';' && codepoint) {
+		if (num_digits >= 1 && num_digits <= 8 &&
+		    i < size && src[i] == ';') {
+			if (codepoint == 0 ||
+			    (codepoint >= 0xD800 && codepoint < 0xE000) ||
+			    codepoint >= 0x110000) {
+				codepoint = 0xFFFD;
+			}
 			utf8proc_encode_char(codepoint, ob);
 			return i + 1;
 		}
@@ -55,7 +68,7 @@ houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size)
 				const struct html_ent *entity = find_entity((char *)src, i);
 
 				if (entity != NULL) {
-					int len = 0;
+					bufsize_t len = 0;
 					while (len < 4 && entity->utf8[len] != '\0') {
 						++len;
 					}
@@ -72,9 +85,9 @@ houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size)
 }
 
 int
-houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, size_t size)
+houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size)
 {
-	size_t  i = 0, org, ent;
+	bufsize_t i = 0, org, ent;
 
 	while (i < size) {
 		org = i;
@@ -109,7 +122,7 @@ houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, size_t size)
 	return 1;
 }
 
-void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, size_t size)
+void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, bufsize_t size)
 {
 	if (!houdini_unescape_html(ob, src, size))
 		cmark_strbuf_put(ob, src, size);
diff --git a/html.c b/html.c
index f1b88fa..a30bbca 100644
--- a/html.c
+++ b/html.c
@@ -11,20 +11,9 @@
 
 // Functions to convert cmark_nodes to HTML strings.
 
-static void escape_html(cmark_strbuf *dest, const unsigned char *source, int length)
+static void escape_html(cmark_strbuf *dest, const unsigned char *source, bufsize_t length)
 {
-	if (length < 0)
-		length = strlen((char *)source);
-
-	houdini_escape_html0(dest, source, (size_t)length, 0);
-}
-
-static void escape_href(cmark_strbuf *dest, const unsigned char *source, int length)
-{
-	if (length < 0)
-		length = strlen((char *)source);
-
-	houdini_escape_href(dest, source, (size_t)length);
+	houdini_escape_html0(dest, source, length, 0);
 }
 
 static inline void cr(cmark_strbuf *html)
@@ -165,7 +154,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
 			S_render_sourcepos(node, html, options);
 			cmark_strbuf_puts(html, "><code>");
 		} else {
-			int first_tag = 0;
+			bufsize_t first_tag = 0;
 			while (first_tag < node->as.code.info.len &&
 			       node->as.code.info.data[first_tag] != ' ') {
 				first_tag += 1;
@@ -261,12 +250,13 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
 	case CMARK_NODE_LINK:
 		if (entering) {
 			cmark_strbuf_puts(html, "<a href=\"");
-			if (node->as.link.url)
-				escape_href(html, node->as.link.url, -1);
+			houdini_escape_href(html, node->as.link.url.data,
+	                                    node->as.link.url.len);
 
-			if (node->as.link.title) {
+			if (node->as.link.title.len) {
 				cmark_strbuf_puts(html, "\" title=\"");
-				escape_html(html, node->as.link.title, -1);
+				escape_html(html, node->as.link.title.data,
+				            node->as.link.title.len);
 			}
 
 			cmark_strbuf_puts(html, "\">");
@@ -278,15 +268,16 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
 	case CMARK_NODE_IMAGE:
 		if (entering) {
 			cmark_strbuf_puts(html, "<img src=\"");
-			if (node->as.link.url)
-				escape_href(html, node->as.link.url, -1);
+			houdini_escape_href(html, node->as.link.url.data,
+		                            node->as.link.url.len);
 
 			cmark_strbuf_puts(html, "\" alt=\"");
 			state->plain = node;
 		} else {
-			if (node->as.link.title) {
+			if (node->as.link.title.len) {
 				cmark_strbuf_puts(html, "\" title=\"");
-				escape_html(html, node->as.link.title, -1);
+				escape_html(html, node->as.link.title.data,
+				            node->as.link.title.len);
 			}
 
 			cmark_strbuf_puts(html, "\" />");
diff --git a/inlines.c b/inlines.c
index afe564c..7e8f806 100644
--- a/inlines.c
+++ b/inlines.c
@@ -36,7 +36,7 @@ typedef struct delimiter {
 	struct delimiter *previous;
 	struct delimiter *next;
 	cmark_node *inl_text;
-	int position;
+	bufsize_t position;
 	unsigned char delim_char;
 	bool can_open;
 	bool can_close;
@@ -45,7 +45,7 @@ typedef struct delimiter {
 
 typedef struct {
 	cmark_chunk input;
-	int pos;
+	bufsize_t pos;
 	cmark_reference_map *refmap;
 	delimiter *last_delim;
 } subject;
@@ -57,33 +57,35 @@ static int parse_inline(subject* subj, cmark_node * parent, int options);
 
 static void subject_from_buf(subject *e, cmark_strbuf *buffer,
                              cmark_reference_map *refmap);
-static int subject_find_special_char(subject *subj, int options);
+static bufsize_t subject_find_special_char(subject *subj, int options);
 
-static unsigned char *cmark_clean_autolink(cmark_chunk *url, int is_email)
+static cmark_chunk cmark_clean_autolink(cmark_chunk *url, int is_email)
 {
 	cmark_strbuf buf = GH_BUF_INIT;
 
 	cmark_chunk_trim(url);
 
-	if (url->len == 0)
-		return NULL;
+	if (url->len == 0) {
+		cmark_chunk result = CMARK_CHUNK_EMPTY;
+		return result;
+	}
 
 	if (is_email)
 		cmark_strbuf_puts(&buf, "mailto:");
 
 	houdini_unescape_html_f(&buf, url->data, url->len);
-	return cmark_strbuf_detach(&buf);
+	return cmark_chunk_buf_detach(&buf);
 }
 
-static inline cmark_node *make_link(cmark_node *label, unsigned char *url, unsigned char *title)
+static inline cmark_node *make_link(cmark_node *label, cmark_chunk *url, cmark_chunk *title)
 {
 	cmark_node* e = (cmark_node *)calloc(1, sizeof(*e));
 	if(e != NULL) {
 		e->type = CMARK_NODE_LINK;
 		e->first_child   = label;
 		e->last_child    = label;
-		e->as.link.url   = url;
-		e->as.link.title = title;
+		e->as.link.url   = *url;
+		e->as.link.title = *title;
 		e->next = NULL;
 		label->parent = e;
 	}
@@ -92,7 +94,9 @@ static inline cmark_node *make_link(cmark_node *label, unsigned char *url, unsig
 
 static inline cmark_node* make_autolink(cmark_node* label, cmark_chunk url, int is_email)
 {
-	return make_link(label, cmark_clean_autolink(&url, is_email), NULL);
+	cmark_chunk clean_url = cmark_clean_autolink(&url, is_email);
+	cmark_chunk title = CMARK_CHUNK_EMPTY;
+	return make_link(label, &clean_url, &title);
 }
 
 // Create an inline with a literal string value.
@@ -134,19 +138,20 @@ static inline cmark_node* make_simple(cmark_node_type t)
 	return e;
 }
 
-static unsigned char *bufdup(const unsigned char *buf)
+// Duplicate a chunk by creating a copy of the buffer not by reusing the
+// buffer like cmark_chunk_dup does.
+static cmark_chunk chunk_clone(cmark_chunk *src)
 {
-	unsigned char *new_buf = NULL;
+	cmark_chunk c;
+	bufsize_t len = src->len;
 
-	if (buf) {
-		int len = strlen((char *)buf);
-		new_buf = (unsigned char *)calloc(len + 1, sizeof(*new_buf));
-		if(new_buf != NULL) {
-			memcpy(new_buf, buf, len + 1);
-		}
-	}
+	c.len   = len;
+	c.data  = (unsigned char *)malloc(len + 1);
+	c.alloc = 1;
+	memcpy(c.data, src->data, len);
+	c.data[len] = '\0';
 
-	return new_buf;
+	return c;
 }
 
 static void subject_from_buf(subject *e, cmark_strbuf *buffer,
@@ -172,7 +177,7 @@ static inline unsigned char peek_char(subject *subj)
 	return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0;
 }
 
-static inline unsigned char peek_at(subject *subj, int pos)
+static inline unsigned char peek_at(subject *subj, bufsize_t pos)
 {
 	return subj->input.data[pos];
 }
@@ -190,8 +195,8 @@ static inline int is_eof(subject* subj)
 static inline cmark_chunk take_while(subject* subj, int (*f)(int))
 {
 	unsigned char c;
-	int startpos = subj->pos;
-	int len = 0;
+	bufsize_t startpos = subj->pos;
+	bufsize_t len = 0;
 
 	while ((c = peek_char(subj)) && (*f)(c)) {
 		advance(subj);
@@ -206,7 +211,7 @@ static inline cmark_chunk take_while(subject* subj, int (*f)(int))
 // parsed).  Return 0 if you don't find matching closing
 // backticks, otherwise return the position in the subject
 // after the closing backticks.
-static int scan_to_closing_backticks(subject* subj, int openticklength)
+static bufsize_t scan_to_closing_backticks(subject* subj, bufsize_t openticklength)
 {
 	// read non backticks
 	unsigned char c;
@@ -216,7 +221,7 @@ static int scan_to_closing_backticks(subject* subj, int openticklength)
 	if (is_eof(subj)) {
 		return 0;  // did not find closing ticks, return 0
 	}
-	int numticks = 0;
+	bufsize_t numticks = 0;
 	while (peek_char(subj) == '`') {
 		advance(subj);
 		numticks++;
@@ -232,8 +237,8 @@ static int scan_to_closing_backticks(subject* subj, int openticklength)
 static cmark_node* handle_backticks(subject *subj)
 {
 	cmark_chunk openticks = take_while(subj, isbacktick);
-	int startpos = subj->pos;
-	int endpos = scan_to_closing_backticks(subj, openticks.len);
+	bufsize_t startpos = subj->pos;
+	bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len);
 
 	if (endpos == 0) { // not found
 		subj->pos = startpos; // rewind
@@ -255,7 +260,7 @@ static int
 scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close)
 {
 	int numdelims = 0;
-	int before_char_pos;
+	bufsize_t before_char_pos;
 	int32_t after_char = 0;
 	int32_t before_char = 0;
 	int len;
@@ -302,9 +307,9 @@ scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close)
 	                   !utf8proc_is_punctuation(after_char));
 	if (c == '_') {
 		*can_open = left_flanking &&
-			(!right_flanking || utf8proc_is_punctuation(before_char));
+		            (!right_flanking || utf8proc_is_punctuation(before_char));
 		*can_close = right_flanking &&
-			(!left_flanking || utf8proc_is_punctuation(after_char));
+		             (!left_flanking || utf8proc_is_punctuation(after_char));
 	} else if (c == '\'' || c == '"') {
 		*can_open = left_flanking && !right_flanking;
 		*can_close = right_flanking;
@@ -371,7 +376,7 @@ static void push_delimiter(subject *subj, unsigned char c, bool can_open,
 // Assumes the subject has a c at the current position.
 static cmark_node* handle_delim(subject* subj, unsigned char c, bool smart)
 {
-	int numdelims;
+	bufsize_t numdelims;
 	cmark_node * inl_text;
 	bool can_open, can_close;
 	cmark_chunk contents;
@@ -495,11 +500,11 @@ static delimiter*
 S_insert_emph(subject *subj, delimiter *opener, delimiter *closer)
 {
 	delimiter *delim, *tmp_delim;
-	int use_delims;
+	bufsize_t use_delims;
 	cmark_node *opener_inl = opener->inl_text;
 	cmark_node *closer_inl = closer->inl_text;
-	int opener_num_chars = opener_inl->as.literal.len;
-	int closer_num_chars = closer_inl->as.literal.len;
+	bufsize_t opener_num_chars = opener_inl->as.literal.len;
+	bufsize_t closer_num_chars = closer_inl->as.literal.len;
 	cmark_node *tmp, *emph, *first_child, *last_child;
 
 	// calculate the actual number of characters used from this closer
@@ -578,7 +583,7 @@ static cmark_node* handle_backslash(subject *subj)
 	if (cmark_ispunct(nextchar)) {  // only ascii symbols and newline can be escaped
 		advance(subj);
 		return make_str(cmark_chunk_dup(&subj->input, subj->pos - 1, 1));
-	} else if (nextchar == '\n') {
+	} else if (nextchar == '\r' || nextchar == '\n') {
 		advance(subj);
 		return make_linebreak();
 	} else {
@@ -591,7 +596,7 @@ static cmark_node* handle_backslash(subject *subj)
 static cmark_node* handle_entity(subject* subj)
 {
 	cmark_strbuf ent = GH_BUF_INIT;
-	size_t len;
+	bufsize_t len;
 
 	advance(subj);
 
@@ -613,7 +618,7 @@ static cmark_node *make_str_with_entities(cmark_chunk *content)
 {
 	cmark_strbuf unescaped = GH_BUF_INIT;
 
-	if (houdini_unescape_html(&unescaped, content->data, (size_t)content->len)) {
+	if (houdini_unescape_html(&unescaped, content->data, content->len)) {
 		return make_str(cmark_chunk_buf_detach(&unescaped));
 	} else {
 		return make_str(*content);
@@ -622,14 +627,16 @@ static cmark_node *make_str_with_entities(cmark_chunk *content)
 
 // Clean a URL: remove surrounding whitespace and surrounding <>,
 // and remove \ that escape punctuation.
-unsigned char *cmark_clean_url(cmark_chunk *url)
+cmark_chunk cmark_clean_url(cmark_chunk *url)
 {
 	cmark_strbuf buf = GH_BUF_INIT;
 
 	cmark_chunk_trim(url);
 
-	if (url->len == 0)
-		return NULL;
+	if (url->len == 0) {
+		cmark_chunk result = CMARK_CHUNK_EMPTY;
+		return result;
+	}
 
 	if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
 		houdini_unescape_html_f(&buf, url->data + 1, url->len - 2);
@@ -638,16 +645,18 @@ unsigned char *cmark_clean_url(cmark_chunk *url)
 	}
 
 	cmark_strbuf_unescape(&buf);
-	return buf.size == 0 ? NULL : cmark_strbuf_detach(&buf);
+	return cmark_chunk_buf_detach(&buf);
 }
 
-unsigned char *cmark_clean_title(cmark_chunk *title)
+cmark_chunk cmark_clean_title(cmark_chunk *title)
 {
 	cmark_strbuf buf = GH_BUF_INIT;
 	unsigned char first, last;
 
-	if (title->len == 0)
-		return NULL;
+	if (title->len == 0) {
+		cmark_chunk result = CMARK_CHUNK_EMPTY;
+		return result;
+	}
 
 	first = title->data[0];
 	last = title->data[title->len - 1];
@@ -662,14 +671,14 @@ unsigned char *cmark_clean_title(cmark_chunk *title)
 	}
 
 	cmark_strbuf_unescape(&buf);
-	return buf.size == 0 ? NULL : cmark_strbuf_detach(&buf);
+	return cmark_chunk_buf_detach(&buf);
 }
 
 // Parse an autolink or HTML tag.
 // Assumes the subject has a '<' character at the current position.
 static cmark_node* handle_pointy_brace(subject* subj)
 {
-	int matchlen = 0;
+	bufsize_t matchlen = 0;
 	cmark_chunk contents;
 
 	advance(subj);  // advance past first <
@@ -716,7 +725,7 @@ static cmark_node* handle_pointy_brace(subject* subj)
 // encountered.  Backticks in labels do not start code spans.
 static int link_label(subject* subj, cmark_chunk *raw_label)
 {
-	int startpos = subj->pos;
+	bufsize_t startpos = subj->pos;
 	int length = 0;
 	unsigned char c;
 
@@ -746,6 +755,7 @@ static int link_label(subject* subj, cmark_chunk *raw_label)
 
 	if (c == ']') { // match found
 		*raw_label = cmark_chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1));
+		cmark_chunk_trim(raw_label);
 		advance(subj);  // advance past ]
 		return 1;
 	}
@@ -759,14 +769,14 @@ static int link_label(subject* subj, cmark_chunk *raw_label)
 // Return a link, an image, or a literal close bracket.
 static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent)
 {
-	int initial_pos;
-	int starturl, endurl, starttitle, endtitle, endall;
-	int n;
-	int sps;
+	bufsize_t initial_pos;
+	bufsize_t starturl, endurl, starttitle, endtitle, endall;
+	bufsize_t n;
+	bufsize_t sps;
 	cmark_reference *ref;
 	bool is_image = false;
 	cmark_chunk url_chunk, title_chunk;
-	unsigned char *url, *title;
+	cmark_chunk url, title;
 	delimiter *opener;
 	cmark_node *link_text;
 	cmark_node *inl;
@@ -854,8 +864,8 @@ static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent)
 	cmark_chunk_free(&raw_label);
 
 	if (ref != NULL) { // found
-		url = bufdup(ref->url);
-		title = bufdup(ref->title);
+		url   = chunk_clone(&ref->url);
+		title = chunk_clone(&ref->title);
 		goto match;
 	} else {
 		goto noMatch;
@@ -912,7 +922,7 @@ static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent)
 // Assumes the subject has a newline at the current position.
 static cmark_node* handle_newline(subject *subj)
 {
-	int nlpos = subj->pos;
+	bufsize_t nlpos = subj->pos;
 	// skip over newline
 	advance(subj);
 	// skip spaces at beginning of line
@@ -928,11 +938,11 @@ static cmark_node* handle_newline(subject *subj)
 	}
 }
 
-static int subject_find_special_char(subject *subj, int options)
+static bufsize_t subject_find_special_char(subject *subj, int options)
 {
-	// "\n\\`&_*[]<!"
+	// "\r\n\\`&_*[]<!"
 	static const int8_t SPECIAL_CHARS[256] = {
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 		0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
@@ -970,7 +980,7 @@ static int subject_find_special_char(subject *subj, int options)
 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 	};
 
-	int n = subj->pos + 1;
+	bufsize_t n = subj->pos + 1;
 
 	while (n < subj->input.len) {
 		if (SPECIAL_CHARS[subj->input.data[n]])
@@ -991,12 +1001,13 @@ static int parse_inline(subject* subj, cmark_node * parent, int options)
 	cmark_node* new_inl = NULL;
 	cmark_chunk contents;
 	unsigned char c;
-	int endpos;
+	bufsize_t endpos;
 	c = peek_char(subj);
 	if (c == 0) {
 		return 0;
 	}
 	switch(c) {
+	case '\r':
 	case '\n':
 		new_inl = handle_newline(subj);
 		break;
@@ -1048,7 +1059,7 @@ static int parse_inline(subject* subj, cmark_node * parent, int options)
 		subj->pos = endpos;
 
 		// if we're at a newline, strip trailing spaces.
-		if (peek_char(subj) == '\n') {
+		if (peek_char(subj) == '\r' || peek_char(subj) == '\n') {
 			cmark_chunk_rtrim(&contents);
 		}
 
@@ -1078,7 +1089,7 @@ static void spnl(subject* subj)
 	bool seen_newline = false;
 	while (peek_char(subj) == ' ' ||
 	       (!seen_newline &&
-	        (seen_newline = peek_char(subj) == '\n'))) {
+	        (seen_newline = peek_char(subj) == '\r' || peek_char(subj) == '\n'))) {
 		advance(subj);
 	}
 }
@@ -1087,7 +1098,7 @@ static void spnl(subject* subj)
 // Modify refmap if a reference is encountered.
 // Return 0 if no reference found, otherwise position of subject
 // after reference is parsed.
-int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap)
+bufsize_t cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap)
 {
 	subject subj;
 
@@ -1095,13 +1106,13 @@ int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refma
 	cmark_chunk url;
 	cmark_chunk title;
 
-	int matchlen = 0;
-	int beforetitle;
+	bufsize_t matchlen = 0;
+	bufsize_t beforetitle;
 
 	subject_from_buf(&subj, input, NULL);
 
 	// parse label:
-	if (!link_label(&subj, &lab))
+	if (!link_label(&subj, &lab) || lab.len == 0)
 		return 0;
 
 	// colon:
@@ -1136,7 +1147,7 @@ int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refma
 	while (peek_char(&subj) == ' ') {
 		advance(&subj);
 	}
-	if (peek_char(&subj) == '\n') {
+	if (peek_char(&subj) == '\r' || peek_char(&subj) == '\n') {
 		advance(&subj);
 	} else if (peek_char(&subj) != 0) {
 		return 0;
diff --git a/inlines.h b/inlines.h
index 9e56790..f8847fc 100644
--- a/inlines.h
+++ b/inlines.h
@@ -5,12 +5,12 @@
 extern "C" {
 #endif
 
-unsigned char *cmark_clean_url(cmark_chunk *url);
-unsigned char *cmark_clean_title(cmark_chunk *title);
+cmark_chunk cmark_clean_url(cmark_chunk *url);
+cmark_chunk cmark_clean_title(cmark_chunk *title);
 
 void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap, int options);
 
-int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap);
+bufsize_t cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap);
 
 #ifdef __cplusplus
 }
diff --git a/iterator.c b/iterator.c
index c6faf99..f18e3bf 100644
--- a/iterator.c
+++ b/iterator.c
@@ -129,18 +129,20 @@ void cmark_consolidate_text_nodes(cmark_node *root)
 		    cur->next &&
 		    cur->next->type == CMARK_NODE_TEXT) {
 			cmark_strbuf_clear(&buf);
-			cmark_strbuf_puts(&buf, cmark_node_get_literal(cur));
+			cmark_strbuf_put(&buf, cur->as.literal.data, cur->as.literal.len);
 			tmp = cur->next;
 			while (tmp && tmp->type == CMARK_NODE_TEXT) {
 				cmark_iter_next(iter); // advance pointer
-				cmark_strbuf_puts(&buf, cmark_node_get_literal(tmp));
+				cmark_strbuf_put(&buf, tmp->as.literal.data, tmp->as.literal.len);
 				next = tmp->next;
 				cmark_node_free(tmp);
 				tmp = next;
 			}
-			cmark_node_set_literal(cur, (char *)cmark_strbuf_detach(&buf));
+			cmark_chunk_free(&cur->as.literal);
+			cur->as.literal = cmark_chunk_buf_detach(&buf);
 		}
 	}
 
+	cmark_strbuf_free(&buf);
 	cmark_iter_free(iter);
 }
diff --git a/node.c b/node.c
index 466b0a1..7b1bb10 100644
--- a/node.c
+++ b/node.c
@@ -122,12 +122,8 @@ void S_free_nodes(cmark_node *e)
 			break;
 		case NODE_LINK:
 		case NODE_IMAGE:
-			if (e->as.link.url) {
-				free(e->as.link.url);
-			}
-			if (e->as.link.title) {
-				free(e->as.link.title);
-			}
+			cmark_chunk_free(&e->as.link.url);
+			cmark_chunk_free(&e->as.link.title);
 			break;
 		default:
 			break;
@@ -282,15 +278,6 @@ cmark_node_set_user_data(cmark_node *node, void *user_data)
 	return 1;
 }
 
-static char*
-S_strdup(const char *str)
-{
-	size_t size = strlen(str) + 1;
-	char *dup = (char *)malloc(size);
-	memcpy(dup, str, size);
-	return dup;
-}
-
 const char*
 cmark_node_get_literal(cmark_node *node)
 {
@@ -541,7 +528,7 @@ cmark_node_get_url(cmark_node *node)
 	switch (node->type) {
 	case NODE_LINK:
 	case NODE_IMAGE:
-		return (char *)node->as.link.url;
+		return cmark_chunk_to_cstr(&node->as.link.url);
 	default:
 		break;
 	}
@@ -559,8 +546,7 @@ cmark_node_set_url(cmark_node *node, const char *url)
 	switch (node->type) {
 	case NODE_LINK:
 	case NODE_IMAGE:
-		free(node->as.link.url);
-		node->as.link.url = (unsigned char *)S_strdup(url);
+		cmark_chunk_set_cstr(&node->as.link.url, url);
 		return 1;
 	default:
 		break;
@@ -579,7 +565,7 @@ cmark_node_get_title(cmark_node *node)
 	switch (node->type) {
 	case NODE_LINK:
 	case NODE_IMAGE:
-		return (char *)node->as.link.title;
+		return cmark_chunk_to_cstr(&node->as.link.title);
 	default:
 		break;
 	}
@@ -597,8 +583,7 @@ cmark_node_set_title(cmark_node *node, const char *title)
 	switch (node->type) {
 	case NODE_LINK:
 	case NODE_IMAGE:
-		free(node->as.link.title);
-		node->as.link.title = (unsigned char *)S_strdup(title);
+		cmark_chunk_set_cstr(&node->as.link.title, title);
 		return 1;
 	default:
 		break;
diff --git a/node.h b/node.h
index 7a45d42..911a18f 100644
--- a/node.h
+++ b/node.h
@@ -38,8 +38,8 @@ typedef struct {
 } cmark_header;
 
 typedef struct {
-	unsigned char *url;
-	unsigned char *title;
+	cmark_chunk url;
+	cmark_chunk title;
 } cmark_link;
 
 struct cmark_node {
diff --git a/parser.h b/parser.h
index cbccae3..6e18c67 100644
--- a/parser.h
+++ b/parser.h
@@ -16,8 +16,12 @@ struct cmark_parser {
 	struct cmark_node* root;
 	struct cmark_node* current;
 	int line_number;
+	bufsize_t offset;
+	bufsize_t first_nonspace;
+	int indent;
+	bool blank;
 	cmark_strbuf *curline;
-	int last_line_length;
+	bufsize_t last_line_length;
 	cmark_strbuf *linebuf;
 	int options;
 };
diff --git a/references.c b/references.c
index 37bf4cb..1d3d56d 100644
--- a/references.c
+++ b/references.c
@@ -20,8 +20,8 @@ static void reference_free(cmark_reference *ref)
 {
 	if(ref != NULL) {
 		free(ref->label);
-		free(ref->url);
-		free(ref->title);
+		cmark_chunk_free(&ref->url);
+		cmark_chunk_free(&ref->title);
 		free(ref);
 	}
 }
diff --git a/references.h b/references.h
index 69325bb..a360cd5 100644
--- a/references.h
+++ b/references.h
@@ -12,8 +12,8 @@ extern "C" {
 struct cmark_reference {
 	struct cmark_reference *next;
 	unsigned char *label;
-	unsigned char *url;
-	unsigned char *title;
+	cmark_chunk url;
+	cmark_chunk title;
 	unsigned int hash;
 };
 
diff --git a/scanners.c b/scanners.c
index 42b9275..3f4ddac 100644
--- a/scanners.c
+++ b/scanners.c
@@ -1,11 +1,11 @@
-/* Generated by re2c 0.13.6 */
+/* Generated by re2c 0.13.5 */
 #include <stdlib.h>
 #include "chunk.h"
 #include "scanners.h"
 
-int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset)
+bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset)
 {
-	int res;
+	bufsize_t res;
 	unsigned char *ptr = (unsigned char *)c->data;
 	unsigned char lim = ptr[c->len];
 
@@ -19,7 +19,7 @@ int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset)
 
 
 // Try to match a scheme including colon.
-int _scan_scheme(const unsigned char *p)
+bufsize_t _scan_scheme(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -578,7 +578,7 @@ int _scan_scheme(const unsigned char *p)
 	if (yych != ':') goto yy31;
 yy35:
 	++p;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy37:
 	yych = *++p;
 	if (yych == 'E') goto yy38;
@@ -2919,7 +2919,7 @@ int _scan_scheme(const unsigned char *p)
 }
 
 // Try to match URI autolink after first <, returning number of chars matched.
-int _scan_autolink_uri(const unsigned char *p)
+bufsize_t _scan_autolink_uri(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -3517,7 +3517,7 @@ int _scan_autolink_uri(const unsigned char *p)
 	}
 	if (yych <= '=') goto yy516;
 	++p;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy524:
 	yych = *++p;
 	if (yych == 'E') goto yy525;
@@ -5858,7 +5858,7 @@ int _scan_autolink_uri(const unsigned char *p)
 }
 
 // Try to match email autolink after first <, returning num of chars matched.
-int _scan_autolink_email(const unsigned char *p)
+bufsize_t _scan_autolink_email(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -6060,7 +6060,7 @@ int _scan_autolink_email(const unsigned char *p)
 	}
 yy985:
 	++p;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy987:
 	++p;
 	yych = *p;
@@ -10803,7 +10803,7 @@ int _scan_autolink_email(const unsigned char *p)
 }
 
 // Try to match an HTML tag after first <, returning num of chars matched.
-int _scan_html_tag(const unsigned char *p)
+bufsize_t _scan_html_tag(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -10964,7 +10964,7 @@ int _scan_html_tag(const unsigned char *p)
 	if (yych != '>') goto yy1239;
 yy1243:
 	++p;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1245:
 	yych = *++p;
 	if (yych == 'C') goto yy1260;
@@ -11455,7 +11455,7 @@ int _scan_html_tag(const unsigned char *p)
 
 // Try to match an HTML block tag including first <,
 // returning num of chars matched.
-int _scan_html_block_tag(const unsigned char *p)
+bufsize_t _scan_html_block_tag(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -11513,7 +11513,7 @@ int _scan_html_block_tag(const unsigned char *p)
 	goto yy1301;
 yy1304:
 	++p;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1306:
 	yych = *++p;
 	if (yych <= '/') {
@@ -12022,7 +12022,7 @@ int _scan_html_block_tag(const unsigned char *p)
 	}
 yy1344:
 	++p;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1346:
 	yych = *++p;
 	if (yych <= 'R') {
@@ -12639,7 +12639,7 @@ int _scan_html_block_tag(const unsigned char *p)
 	}
 yy1467:
 	++p;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1469:
 	yych = *++p;
 	if (yych <= 'R') {
@@ -13243,7 +13243,7 @@ int _scan_html_block_tag(const unsigned char *p)
 // This may optionally be contained in <..>; otherwise
 // whitespace and unbalanced right parentheses aren't allowed.
 // Newlines aren't ever allowed.
-int _scan_link_url(const unsigned char *p)
+bufsize_t _scan_link_url(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -13253,7 +13253,7 @@ int _scan_link_url(const unsigned char *p)
 	unsigned int yyaccept = 0;
 	static const unsigned char yybm[] = {
 		  0,  64,  64,  64,  64,  64,  64,  64, 
-		 64,  64,   8,  64,  64,  64,  64,  64, 
+		 64,  64,   8,  64,  64,   8,  64,  64, 
 		 64,  64,  64,  64,  64,  64,  64,  64, 
 		 64,  64,  64,  64,  64,  64,  64,  64, 
 		 72, 112, 112, 112, 112, 112, 112, 112, 
@@ -13286,27 +13286,29 @@ int _scan_link_url(const unsigned char *p)
 		112, 112, 112, 112, 112, 112, 112, 112, 
 	};
 	yych = *p;
-	if (yych <= '(') {
-		if (yych <= 0x1F) {
+	if (yych <= '\'') {
+		if (yych <= '\f') {
 			if (yych == '\n') goto yy1589;
 			goto yy1597;
 		} else {
+			if (yych <= '\r') goto yy1591;
+			if (yych <= 0x1F) goto yy1597;
 			if (yych <= ' ') goto yy1591;
-			if (yych <= '\'') goto yy1593;
-			goto yy1596;
+			goto yy1593;
 		}
 	} else {
-		if (yych <= '<') {
+		if (yych <= ';') {
+			if (yych <= '(') goto yy1596;
 			if (yych <= ')') goto yy1597;
-			if (yych <= ';') goto yy1593;
-			goto yy1592;
+			goto yy1593;
 		} else {
+			if (yych <= '<') goto yy1592;
 			if (yych == '\\') goto yy1594;
 			goto yy1593;
 		}
 	}
 yy1588:
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1589:
 	yyaccept = 0;
 	marker = ++p;
@@ -13339,13 +13341,18 @@ int _scan_link_url(const unsigned char *p)
 	if (yybm[0+yych] & 32) {
 		goto yy1605;
 	}
-	if (yych <= '\'') {
-		if (yych <= 0x00) goto yy1588;
-		if (yych == '\n') goto yy1588;
-		goto yy1612;
+	if (yych <= '\r') {
+		if (yych <= '\t') {
+			if (yych <= 0x00) goto yy1588;
+			goto yy1612;
+		} else {
+			if (yych <= '\n') goto yy1588;
+			if (yych <= '\f') goto yy1612;
+			goto yy1588;
+		}
 	} else {
 		if (yych <= ')') {
-			if (yych <= '(') goto yy1610;
+			if (yych == '(') goto yy1610;
 			goto yy1612;
 		} else {
 			if (yych <= '=') goto yy1602;
@@ -13395,7 +13402,7 @@ int _scan_link_url(const unsigned char *p)
 yy1600:
 	p = marker;
 	if (yyaccept <= 1) {
-		if (yyaccept == 0) {
+		if (yyaccept <= 0) {
 			goto yy1588;
 		} else {
 			goto yy1595;
@@ -13459,13 +13466,18 @@ int _scan_link_url(const unsigned char *p)
 	if (yybm[0+yych] & 32) {
 		goto yy1605;
 	}
-	if (yych <= '\'') {
-		if (yych <= 0x00) goto yy1588;
-		if (yych == '\n') goto yy1588;
-		goto yy1612;
+	if (yych <= '\r') {
+		if (yych <= '\t') {
+			if (yych <= 0x00) goto yy1588;
+			goto yy1612;
+		} else {
+			if (yych <= '\n') goto yy1588;
+			if (yych <= '\f') goto yy1612;
+			goto yy1588;
+		}
 	} else {
 		if (yych <= ')') {
-			if (yych <= '(') goto yy1610;
+			if (yych == '(') goto yy1610;
 			goto yy1612;
 		} else {
 			if (yych <= '=') goto yy1602;
@@ -13478,48 +13490,57 @@ int _scan_link_url(const unsigned char *p)
 	if (yych <= ' ') goto yy1608;
 	if (yych != ')') goto yy1603;
 yy1608:
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1609:
 	++p;
 	yych = *p;
-	if (yych <= '>') {
-		if (yych <= ' ') {
+	if (yych <= '=') {
+		if (yych <= '\f') {
 			if (yych <= 0x00) goto yy1600;
 			if (yych == '\n') goto yy1600;
 			goto yy1612;
 		} else {
-			if (yych <= '/') goto yy1605;
-			if (yych <= '9') goto yy1612;
-			if (yych <= '=') goto yy1605;
-			goto yy1622;
+			if (yych <= ' ') {
+				if (yych <= '\r') goto yy1600;
+				goto yy1612;
+			} else {
+				if (yych <= '/') goto yy1605;
+				if (yych <= '9') goto yy1612;
+				goto yy1605;
+			}
 		}
 	} else {
-		if (yych <= '\\') {
+		if (yych <= '[') {
+			if (yych <= '>') goto yy1622;
 			if (yych <= '@') goto yy1605;
 			if (yych <= 'Z') goto yy1612;
-			if (yych <= '[') goto yy1605;
-			goto yy1623;
+			goto yy1605;
 		} else {
-			if (yych <= '`') goto yy1605;
-			if (yych <= 'z') goto yy1612;
-			if (yych <= '~') goto yy1605;
-			goto yy1612;
+			if (yych <= '`') {
+				if (yych <= '\\') goto yy1623;
+				goto yy1605;
+			} else {
+				if (yych <= 'z') goto yy1612;
+				if (yych <= '~') goto yy1605;
+				goto yy1612;
+			}
 		}
 	}
 yy1610:
 	++p;
 	yych = *p;
-	if (yych <= ')') {
-		if (yych <= '\n') {
+	if (yych <= '(') {
+		if (yych <= '\f') {
 			if (yych <= 0x00) goto yy1600;
-			if (yych >= '\n') goto yy1600;
+			if (yych == '\n') goto yy1600;
 		} else {
+			if (yych <= '\r') goto yy1600;
 			if (yych <= ' ') goto yy1612;
 			if (yych <= '\'') goto yy1610;
-			if (yych >= ')') goto yy1605;
 		}
 	} else {
 		if (yych <= '=') {
+			if (yych <= ')') goto yy1605;
 			if (yych == '<') goto yy1598;
 			goto yy1610;
 		} else {
@@ -13545,11 +13566,12 @@ int _scan_link_url(const unsigned char *p)
 	if (yybm[0+yych] & 128) {
 		goto yy1615;
 	}
-	if (yych <= '\n') {
+	if (yych <= '\f') {
 		if (yych <= 0x00) goto yy1600;
-		if (yych <= '\t') goto yy1612;
-		goto yy1600;
+		if (yych == '\n') goto yy1600;
+		goto yy1612;
 	} else {
+		if (yych <= '\r') goto yy1600;
 		if (yych != '>') goto yy1612;
 	}
 	yyaccept = 2;
@@ -13570,46 +13592,56 @@ int _scan_link_url(const unsigned char *p)
 yy1619:
 	++p;
 	yych = *p;
-	if (yych <= '>') {
-		if (yych <= ' ') {
+	if (yych <= '=') {
+		if (yych <= '\f') {
 			if (yych <= 0x00) goto yy1600;
 			if (yych == '\n') goto yy1600;
 			goto yy1612;
 		} else {
-			if (yych <= '/') goto yy1610;
-			if (yych <= '9') goto yy1612;
-			if (yych <= '=') goto yy1610;
+			if (yych <= ' ') {
+				if (yych <= '\r') goto yy1600;
+				goto yy1612;
+			} else {
+				if (yych <= '/') goto yy1610;
+				if (yych <= '9') goto yy1612;
+				goto yy1610;
+			}
 		}
 	} else {
-		if (yych <= '\\') {
+		if (yych <= '[') {
+			if (yych <= '>') goto yy1620;
 			if (yych <= '@') goto yy1610;
 			if (yych <= 'Z') goto yy1612;
-			if (yych <= '[') goto yy1610;
-			goto yy1621;
+			goto yy1610;
 		} else {
-			if (yych <= '`') goto yy1610;
-			if (yych <= 'z') goto yy1612;
-			if (yych <= '~') goto yy1610;
-			goto yy1612;
+			if (yych <= '`') {
+				if (yych <= '\\') goto yy1621;
+				goto yy1610;
+			} else {
+				if (yych <= 'z') goto yy1612;
+				if (yych <= '~') goto yy1610;
+				goto yy1612;
+			}
 		}
 	}
 yy1620:
 	yyaccept = 2;
 	marker = ++p;
 	yych = *p;
-	if (yych <= ')') {
-		if (yych <= '\n') {
+	if (yych <= '(') {
+		if (yych <= '\f') {
 			if (yych <= 0x00) goto yy1608;
-			if (yych <= '\t') goto yy1612;
-			goto yy1608;
+			if (yych == '\n') goto yy1608;
+			goto yy1612;
 		} else {
+			if (yych <= '\r') goto yy1608;
 			if (yych <= ' ') goto yy1612;
 			if (yych <= '\'') goto yy1610;
-			if (yych <= '(') goto yy1612;
-			goto yy1605;
+			goto yy1612;
 		}
 	} else {
 		if (yych <= '=') {
+			if (yych <= ')') goto yy1605;
 			if (yych == '<') goto yy1598;
 			goto yy1610;
 		} else {
@@ -13621,22 +13653,23 @@ int _scan_link_url(const unsigned char *p)
 yy1621:
 	++p;
 	yych = *p;
-	if (yych <= '(') {
+	if (yych <= '\'') {
 		if (yych <= '\n') {
 			if (yych <= 0x00) goto yy1600;
 			if (yych <= '\t') goto yy1612;
 			goto yy1600;
 		} else {
+			if (yych == '\r') goto yy1600;
 			if (yych <= ' ') goto yy1612;
-			if (yych <= '\'') goto yy1610;
-			goto yy1612;
+			goto yy1610;
 		}
 	} else {
-		if (yych <= '>') {
+		if (yych <= '=') {
+			if (yych <= '(') goto yy1612;
 			if (yych <= ')') goto yy1605;
-			if (yych <= '=') goto yy1610;
-			goto yy1620;
+			goto yy1610;
 		} else {
+			if (yych <= '>') goto yy1620;
 			if (yych == '\\') goto yy1619;
 			goto yy1610;
 		}
@@ -13648,13 +13681,18 @@ int _scan_link_url(const unsigned char *p)
 	if (yybm[0+yych] & 32) {
 		goto yy1605;
 	}
-	if (yych <= '\'') {
-		if (yych <= 0x00) goto yy1608;
-		if (yych == '\n') goto yy1608;
-		goto yy1612;
+	if (yych <= '\r') {
+		if (yych <= '\t') {
+			if (yych <= 0x00) goto yy1608;
+			goto yy1612;
+		} else {
+			if (yych <= '\n') goto yy1608;
+			if (yych <= '\f') goto yy1612;
+			goto yy1608;
+		}
 	} else {
 		if (yych <= ')') {
-			if (yych <= '(') goto yy1610;
+			if (yych == '(') goto yy1610;
 			goto yy1612;
 		} else {
 			if (yych <= '=') goto yy1602;
@@ -13666,22 +13704,23 @@ int _scan_link_url(const unsigned char *p)
 	yyaccept = 0;
 	marker = ++p;
 	yych = *p;
-	if (yych <= '(') {
+	if (yych <= '\'') {
 		if (yych <= '\n') {
 			if (yych <= 0x00) goto yy1588;
 			if (yych <= '\t') goto yy1612;
 			goto yy1588;
 		} else {
+			if (yych == '\r') goto yy1588;
 			if (yych <= ' ') goto yy1612;
-			if (yych <= '\'') goto yy1605;
-			goto yy1610;
+			goto yy1605;
 		}
 	} else {
-		if (yych <= '>') {
+		if (yych <= '=') {
+			if (yych <= '(') goto yy1610;
 			if (yych <= ')') goto yy1612;
-			if (yych <= '=') goto yy1605;
-			goto yy1622;
+			goto yy1605;
 		} else {
+			if (yych <= '>') goto yy1622;
 			if (yych == '\\') goto yy1609;
 			goto yy1605;
 		}
@@ -13693,7 +13732,7 @@ int _scan_link_url(const unsigned char *p)
 // Try to match a link title (in single quotes, in double quotes, or
 // in parentheses), returning number of chars matched.  Allow one
 // level of internal nesting (quotes within quotes).
-int _scan_link_title(const unsigned char *p)
+bufsize_t _scan_link_title(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -13779,13 +13818,13 @@ int _scan_link_title(const unsigned char *p)
 yy1633:
 	p = marker;
 	if (yyaccept <= 1) {
-		if (yyaccept == 0) {
+		if (yyaccept <= 0) {
 			goto yy1626;
 		} else {
 			goto yy1637;
 		}
 	} else {
-		if (yyaccept == 2) {
+		if (yyaccept <= 2) {
 			goto yy1644;
 		} else {
 			goto yy1651;
@@ -13803,7 +13842,7 @@ int _scan_link_title(const unsigned char *p)
 yy1636:
 	++p;
 yy1637:
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1638:
 	yyaccept = 1;
 	marker = ++p;
@@ -13835,7 +13874,7 @@ int _scan_link_title(const unsigned char *p)
 yy1643:
 	++p;
 yy1644:
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1645:
 	yyaccept = 2;
 	marker = ++p;
@@ -13867,7 +13906,7 @@ int _scan_link_title(const unsigned char *p)
 yy1650:
 	++p;
 yy1651:
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1652:
 	yyaccept = 3;
 	marker = ++p;
@@ -13883,7 +13922,7 @@ int _scan_link_title(const unsigned char *p)
 }
 
 // Match space characters, including newlines.
-int _scan_spacechars(const unsigned char *p)
+bufsize_t _scan_spacechars(const unsigned char *p)
 {
   const unsigned char *start = p; \
 
@@ -13934,7 +13973,7 @@ int _scan_spacechars(const unsigned char *p)
 		goto yy1659;
 	}
 yy1655:
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1656:
 	yych = *++p;
 	goto yy1658;
@@ -13954,7 +13993,7 @@ int _scan_spacechars(const unsigned char *p)
 }
 
 // Match ATX header start.
-int _scan_atx_header_start(const unsigned char *p)
+bufsize_t _scan_atx_header_start(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -14006,16 +14045,21 @@ int _scan_atx_header_start(const unsigned char *p)
 	if (yybm[0+yych] & 128) {
 		goto yy1668;
 	}
-	if (yych == '\n') goto yy1666;
-	if (yych == '#') goto yy1670;
-	goto yy1663;
+	if (yych <= '\f') {
+		if (yych == '\n') goto yy1666;
+		goto yy1663;
+	} else {
+		if (yych <= '\r') goto yy1666;
+		if (yych == '#') goto yy1670;
+		goto yy1663;
+	}
 yy1665:
 	yych = *++p;
 	goto yy1663;
 yy1666:
 	++p;
 yy1667:
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1668:
 	++p;
 	yych = *p;
@@ -14028,8 +14072,12 @@ int _scan_atx_header_start(const unsigned char *p)
 	if (yybm[0+yych] & 128) {
 		goto yy1668;
 	}
-	if (yych == '\n') goto yy1666;
-	if (yych == '#') goto yy1672;
+	if (yych <= '\f') {
+		if (yych == '\n') goto yy1666;
+	} else {
+		if (yych <= '\r') goto yy1666;
+		if (yych == '#') goto yy1672;
+	}
 yy1671:
 	p = marker;
 	goto yy1663;
@@ -14038,33 +14086,49 @@ int _scan_atx_header_start(const unsigned char *p)
 	if (yybm[0+yych] & 128) {
 		goto yy1668;
 	}
-	if (yych == '\n') goto yy1666;
-	if (yych != '#') goto yy1671;
+	if (yych <= '\f') {
+		if (yych == '\n') goto yy1666;
+		goto yy1671;
+	} else {
+		if (yych <= '\r') goto yy1666;
+		if (yych != '#') goto yy1671;
+	}
 	yych = *++p;
 	if (yybm[0+yych] & 128) {
 		goto yy1668;
 	}
-	if (yych == '\n') goto yy1666;
-	if (yych != '#') goto yy1671;
+	if (yych <= '\f') {
+		if (yych == '\n') goto yy1666;
+		goto yy1671;
+	} else {
+		if (yych <= '\r') goto yy1666;
+		if (yych != '#') goto yy1671;
+	}
 	yych = *++p;
 	if (yybm[0+yych] & 128) {
 		goto yy1668;
 	}
-	if (yych == '\n') goto yy1666;
-	if (yych != '#') goto yy1671;
+	if (yych <= '\f') {
+		if (yych == '\n') goto yy1666;
+		goto yy1671;
+	} else {
+		if (yych <= '\r') goto yy1666;
+		if (yych != '#') goto yy1671;
+	}
 	++p;
 	if (yybm[0+(yych = *p)] & 128) {
 		goto yy1668;
 	}
 	if (yych == '\n') goto yy1666;
+	if (yych == '\r') goto yy1666;
 	goto yy1671;
 }
 
 }
 
-// Match sexext header line.  Return 1 for level-1 header,
+// Match setext header line.  Return 1 for level-1 header,
 // 2 for level-2, 0 for no match.
-int _scan_setext_header_line(const unsigned char *p)
+bufsize_t _scan_setext_header_line(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
 
@@ -14119,17 +14183,27 @@ int _scan_setext_header_line(const unsigned char *p)
 	if (yybm[0+yych] & 128) {
 		goto yy1693;
 	}
-	if (yych == '\n') goto yy1691;
-	if (yych == ' ') goto yy1689;
-	goto yy1678;
+	if (yych <= '\f') {
+		if (yych == '\n') goto yy1691;
+		goto yy1678;
+	} else {
+		if (yych <= '\r') goto yy1691;
+		if (yych == ' ') goto yy1689;
+		goto yy1678;
+	}
 yy1680:
 	yych = *(marker = ++p);
 	if (yybm[0+yych] & 32) {
 		goto yy1682;
 	}
-	if (yych == '\n') goto yy1685;
-	if (yych == '-') goto yy1687;
-	goto yy1678;
+	if (yych <= '\f') {
+		if (yych == '\n') goto yy1685;
+		goto yy1678;
+	} else {
+		if (yych <= '\r') goto yy1685;
+		if (yych == '-') goto yy1687;
+		goto yy1678;
+	}
 yy1681:
 	yych = *++p;
 	goto yy1678;
@@ -14140,6 +14214,7 @@ int _scan_setext_header_line(const unsigned char *p)
 		goto yy1682;
 	}
 	if (yych == '\n') goto yy1685;
+	if (yych == '\r') goto yy1685;
 yy1684:
 	p = marker;
 	goto yy1678;
@@ -14152,15 +14227,24 @@ int _scan_setext_header_line(const unsigned char *p)
 	if (yybm[0+yych] & 32) {
 		goto yy1682;
 	}
-	if (yych == '\n') goto yy1685;
-	if (yych == '-') goto yy1687;
-	goto yy1684;
+	if (yych <= '\f') {
+		if (yych == '\n') goto yy1685;
+		goto yy1684;
+	} else {
+		if (yych <= '\r') goto yy1685;
+		if (yych == '-') goto yy1687;
+		goto yy1684;
+	}
 yy1689:
 	++p;
 	yych = *p;
-	if (yych == '\n') goto yy1691;
-	if (yych == ' ') goto yy1689;
-	goto yy1684;
+	if (yych <= '\f') {
+		if (yych != '\n') goto yy1684;
+	} else {
+		if (yych <= '\r') goto yy1691;
+		if (yych == ' ') goto yy1689;
+		goto yy1684;
+	}
 yy1691:
 	++p;
 	{ return 1; }
@@ -14170,9 +14254,14 @@ int _scan_setext_header_line(const unsigned char *p)
 	if (yybm[0+yych] & 128) {
 		goto yy1693;
 	}
-	if (yych == '\n') goto yy1691;
-	if (yych == ' ') goto yy1689;
-	goto yy1684;
+	if (yych <= '\f') {
+		if (yych == '\n') goto yy1691;
+		goto yy1684;
+	} else {
+		if (yych <= '\r') goto yy1691;
+		if (yych == ' ') goto yy1689;
+		goto yy1684;
+	}
 }
 
 }
@@ -14180,7 +14269,7 @@ int _scan_setext_header_line(const unsigned char *p)
 // Scan a horizontal rule line: "...three or more hyphens, asterisks,
 // or underscores on a line by themselves. If you wish, you may use
 // spaces between the hyphens or asterisks."
-int _scan_hrule(const unsigned char *p)
+bufsize_t _scan_hrule(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -14278,20 +14367,24 @@ int _scan_hrule(const unsigned char *p)
 	if (yybm[0+yych] & 16) {
 		goto yy1707;
 	}
-	if (yych <= 0x08) goto yy1704;
-	if (yych <= '\t') goto yy1709;
-	if (yych <= '\n') goto yy1711;
-	goto yy1704;
+	if (yych <= '\n') {
+		if (yych <= 0x08) goto yy1704;
+		if (yych >= '\n') goto yy1711;
+	} else {
+		if (yych == '\r') goto yy1711;
+		goto yy1704;
+	}
 yy1709:
 	++p;
 	yych = *p;
 	if (yybm[0+yych] & 32) {
 		goto yy1709;
 	}
-	if (yych != '\n') goto yy1704;
+	if (yych == '\n') goto yy1711;
+	if (yych != '\r') goto yy1704;
 yy1711:
 	++p;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1713:
 	++p;
 	yych = *p;
@@ -14308,23 +14401,28 @@ int _scan_hrule(const unsigned char *p)
 	if (yybm[0+yych] & 64) {
 		goto yy1717;
 	}
-	if (yych <= 0x08) goto yy1704;
-	if (yych <= '\t') goto yy1719;
-	if (yych <= '\n') goto yy1721;
-	goto yy1704;
+	if (yych <= '\n') {
+		if (yych <= 0x08) goto yy1704;
+		if (yych >= '\n') goto yy1721;
+	} else {
+		if (yych == '\r') goto yy1721;
+		goto yy1704;
+	}
 yy1719:
 	++p;
 	yych = *p;
-	if (yych <= '\n') {
+	if (yych <= '\f') {
 		if (yych <= 0x08) goto yy1704;
 		if (yych <= '\t') goto yy1719;
+		if (yych >= '\v') goto yy1704;
 	} else {
+		if (yych <= '\r') goto yy1721;
 		if (yych == ' ') goto yy1719;
 		goto yy1704;
 	}
 yy1721:
 	++p;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1723:
 	++p;
 	yych = *p;
@@ -14341,29 +14439,34 @@ int _scan_hrule(const unsigned char *p)
 	if (yybm[0+yych] & 128) {
 		goto yy1727;
 	}
-	if (yych <= 0x08) goto yy1704;
-	if (yych <= '\t') goto yy1729;
-	if (yych <= '\n') goto yy1731;
-	goto yy1704;
+	if (yych <= '\n') {
+		if (yych <= 0x08) goto yy1704;
+		if (yych >= '\n') goto yy1731;
+	} else {
+		if (yych == '\r') goto yy1731;
+		goto yy1704;
+	}
 yy1729:
 	++p;
 	yych = *p;
-	if (yych <= '\n') {
+	if (yych <= '\f') {
 		if (yych <= 0x08) goto yy1704;
 		if (yych <= '\t') goto yy1729;
+		if (yych >= '\v') goto yy1704;
 	} else {
+		if (yych <= '\r') goto yy1731;
 		if (yych == ' ') goto yy1729;
 		goto yy1704;
 	}
 yy1731:
 	++p;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 }
 
 }
 
 // Scan an opening code fence.
-int _scan_open_code_fence(const unsigned char *p)
+bufsize_t _scan_open_code_fence(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -14372,7 +14475,7 @@ int _scan_open_code_fence(const unsigned char *p)
 	unsigned char yych;
 	static const unsigned char yybm[] = {
 		  0, 160, 160, 160, 160, 160, 160, 160, 
-		160, 160,   0, 160, 160, 160, 160, 160, 
+		160, 160,   0, 160, 160,   0, 160, 160, 
 		160, 160, 160, 160, 160, 160, 160, 160, 
 		160, 160, 160, 160, 160, 160, 160, 160, 
 		160, 160, 160, 160, 160, 160, 160, 160, 
@@ -14454,7 +14557,7 @@ int _scan_open_code_fence(const unsigned char *p)
 yy1745:
 	++p;
 	p = marker;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1747:
 	yych = *++p;
 	if (yybm[0+yych] & 64) {
@@ -14482,13 +14585,13 @@ int _scan_open_code_fence(const unsigned char *p)
 yy1752:
 	++p;
 	p = marker;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 }
 
 }
 
 // Scan a closing code fence with length at least len.
-int _scan_close_code_fence(const unsigned char *p)
+bufsize_t _scan_close_code_fence(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -14565,20 +14668,26 @@ int _scan_close_code_fence(const unsigned char *p)
 	if (yybm[0+yych] & 64) {
 		goto yy1764;
 	}
-	if (yych == '\n') goto yy1766;
-	if (yych == '~') goto yy1762;
-	goto yy1761;
+	if (yych <= '\f') {
+		if (yych == '\n') goto yy1766;
+		goto yy1761;
+	} else {
+		if (yych <= '\r') goto yy1766;
+		if (yych == '~') goto yy1762;
+		goto yy1761;
+	}
 yy1764:
 	++p;
 	yych = *p;
 	if (yybm[0+yych] & 64) {
 		goto yy1764;
 	}
-	if (yych != '\n') goto yy1761;
+	if (yych == '\n') goto yy1766;
+	if (yych != '\r') goto yy1761;
 yy1766:
 	++p;
 	p = marker;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1768:
 	yych = *++p;
 	if (yybm[0+yych] & 128) {
@@ -14592,33 +14701,38 @@ int _scan_close_code_fence(const unsigned char *p)
 	if (yybm[0+yych] & 128) {
 		goto yy1769;
 	}
-	if (yych <= '\n') {
+	if (yych <= '\f') {
 		if (yych <= 0x08) goto yy1761;
-		if (yych >= '\n') goto yy1773;
+		if (yych <= '\t') goto yy1771;
+		if (yych <= '\n') goto yy1773;
+		goto yy1761;
 	} else {
+		if (yych <= '\r') goto yy1773;
 		if (yych != ' ') goto yy1761;
 	}
 yy1771:
 	++p;
 	yych = *p;
-	if (yych <= '\n') {
+	if (yych <= '\f') {
 		if (yych <= 0x08) goto yy1761;
 		if (yych <= '\t') goto yy1771;
+		if (yych >= '\v') goto yy1761;
 	} else {
+		if (yych <= '\r') goto yy1773;
 		if (yych == ' ') goto yy1771;
 		goto yy1761;
 	}
 yy1773:
 	++p;
 	p = marker;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 }
 
 }
 
 // Scans an entity.
 // Returns number of chars matched.
-int _scan_entity(const unsigned char *p)
+bufsize_t _scan_entity(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -14685,7 +14799,7 @@ int _scan_entity(const unsigned char *p)
 	}
 yy1784:
 	++p;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1786:
 	yych = *++p;
 	if (yych <= ';') {
diff --git a/scanners.h b/scanners.h
index 1353f3b..bc5134e 100644
--- a/scanners.h
+++ b/scanners.h
@@ -5,21 +5,21 @@
 extern "C" {
 #endif
 
-int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset);
-int _scan_scheme(const unsigned char *p);
-int _scan_autolink_uri(const unsigned char *p);
-int _scan_autolink_email(const unsigned char *p);
-int _scan_html_tag(const unsigned char *p);
-int _scan_html_block_tag(const unsigned char *p);
-int _scan_link_url(const unsigned char *p);
-int _scan_link_title(const unsigned char *p);
-int _scan_spacechars(const unsigned char *p);
-int _scan_atx_header_start(const unsigned char *p);
-int _scan_setext_header_line(const unsigned char *p);
-int _scan_hrule(const unsigned char *p);
-int _scan_open_code_fence(const unsigned char *p);
-int _scan_close_code_fence(const unsigned char *p);
-int _scan_entity(const unsigned char *p);
+bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset);
+bufsize_t _scan_scheme(const unsigned char *p);
+bufsize_t _scan_autolink_uri(const unsigned char *p);
+bufsize_t _scan_autolink_email(const unsigned char *p);
+bufsize_t _scan_html_tag(const unsigned char *p);
+bufsize_t _scan_html_block_tag(const unsigned char *p);
+bufsize_t _scan_link_url(const unsigned char *p);
+bufsize_t _scan_link_title(const unsigned char *p);
+bufsize_t _scan_spacechars(const unsigned char *p);
+bufsize_t _scan_atx_header_start(const unsigned char *p);
+bufsize_t _scan_setext_header_line(const unsigned char *p);
+bufsize_t _scan_hrule(const unsigned char *p);
+bufsize_t _scan_open_code_fence(const unsigned char *p);
+bufsize_t _scan_close_code_fence(const unsigned char *p);
+bufsize_t _scan_entity(const unsigned char *p);
 
 #define scan_scheme(c, n) _scan_at(&_scan_scheme, c, n)
 #define scan_autolink_uri(c, n) _scan_at(&_scan_autolink_uri, c, n)
diff --git a/scanners.re b/scanners.re
index 31cdb4f..3722a99 100644
--- a/scanners.re
+++ b/scanners.re
@@ -2,9 +2,9 @@
 #include "chunk.h"
 #include "scanners.h"
 
-int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset)
+bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset)
 {
-	int res;
+	bufsize_t res;
 	unsigned char *ptr = (unsigned char *)c->data;
 	unsigned char lim = ptr[c->len];
 
@@ -70,29 +70,29 @@ int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset)
 */
 
 // Try to match a scheme including colon.
-int _scan_scheme(const unsigned char *p)
+bufsize_t _scan_scheme(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
 /*!re2c
-  scheme [:] { return (p - start); }
+  scheme [:] { return (bufsize_t)(p - start); }
   .? { return 0; }
 */
 }
 
 // Try to match URI autolink after first <, returning number of chars matched.
-int _scan_autolink_uri(const unsigned char *p)
+bufsize_t _scan_autolink_uri(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
 /*!re2c
-  scheme [:][^\x00-\x20<>]*[>]  { return (p - start); }
+  scheme [:][^\x00-\x20<>]*[>]  { return (bufsize_t)(p - start); }
   .? { return 0; }
 */
 }
 
 // Try to match email autolink after first <, returning num of chars matched.
-int _scan_autolink_email(const unsigned char *p)
+bufsize_t _scan_autolink_email(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -101,32 +101,32 @@ int _scan_autolink_email(const unsigned char *p)
     [@]
     [a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
     ([.][a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*
-    [>] { return (p - start); }
+    [>] { return (bufsize_t)(p - start); }
   .? { return 0; }
 */
 }
 
 // Try to match an HTML tag after first <, returning num of chars matched.
-int _scan_html_tag(const unsigned char *p)
+bufsize_t _scan_html_tag(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
 /*!re2c
-  htmltag { return (p - start); }
+  htmltag { return (bufsize_t)(p - start); }
   .? { return 0; }
 */
 }
 
 // Try to match an HTML block tag including first <,
 // returning num of chars matched.
-int _scan_html_block_tag(const unsigned char *p)
+bufsize_t _scan_html_block_tag(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
 /*!re2c
-  [<] [/] blocktagname (spacechar | [>])  { return (p - start); }
-  [<] blocktagname (spacechar | [/>]) { return (p - start); }
-  [<] [!?] { return (p - start); }
+  [<] [/] blocktagname (spacechar | [>])  { return (bufsize_t)(p - start); }
+  [<] blocktagname (spacechar | [/>]) { return (bufsize_t)(p - start); }
+  [<] [!?] { return (bufsize_t)(p - start); }
   .? { return 0; }
 */
 }
@@ -135,13 +135,13 @@ int _scan_html_block_tag(const unsigned char *p)
 // This may optionally be contained in <..>; otherwise
 // whitespace and unbalanced right parentheses aren't allowed.
 // Newlines aren't ever allowed.
-int _scan_link_url(const unsigned char *p)
+bufsize_t _scan_link_url(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
 /*!re2c
-  [ \n]* [<] ([^<>\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); }
-  [ \n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (p - start); }
+  [ \r\n]* [<] ([^<>\r\n\\\x00] | escaped_char | [\\])* [>] { return (bufsize_t)(p - start); }
+  [ \r\n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (bufsize_t)(p - start); }
   .? { return 0; }
 */
 }
@@ -149,47 +149,47 @@ int _scan_link_url(const unsigned char *p)
 // Try to match a link title (in single quotes, in double quotes, or
 // in parentheses), returning number of chars matched.  Allow one
 // level of internal nesting (quotes within quotes).
-int _scan_link_title(const unsigned char *p)
+bufsize_t _scan_link_title(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
 /*!re2c
-  ["] (escaped_char|[^"\x00])* ["]   { return (p - start); }
-  ['] (escaped_char|[^'\x00])* ['] { return (p - start); }
-  [(] (escaped_char|[^)\x00])* [)]  { return (p - start); }
+  ["] (escaped_char|[^"\x00])* ["]   { return (bufsize_t)(p - start); }
+  ['] (escaped_char|[^'\x00])* ['] { return (bufsize_t)(p - start); }
+  [(] (escaped_char|[^)\x00])* [)]  { return (bufsize_t)(p - start); }
   .? { return 0; }
 */
 }
 
 // Match space characters, including newlines.
-int _scan_spacechars(const unsigned char *p)
+bufsize_t _scan_spacechars(const unsigned char *p)
 {
   const unsigned char *start = p; \
 /*!re2c
-  [ \t\v\f\r\n]* { return (p - start); }
+  [ \t\v\f\r\n]* { return (bufsize_t)(p - start); }
   . { return 0; }
 */
 }
 
 // Match ATX header start.
-int _scan_atx_header_start(const unsigned char *p)
+bufsize_t _scan_atx_header_start(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
 /*!re2c
-  [#]{1,6} ([ ]+|[\n])  { return (p - start); }
+  [#]{1,6} ([ ]+|[\r\n])  { return (bufsize_t)(p - start); }
   .? { return 0; }
 */
 }
 
-// Match sexext header line.  Return 1 for level-1 header,
+// Match setext header line.  Return 1 for level-1 header,
 // 2 for level-2, 0 for no match.
-int _scan_setext_header_line(const unsigned char *p)
+bufsize_t _scan_setext_header_line(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
 /*!re2c
-  [=]+ [ ]* [\n] { return 1; }
-  [-]+ [ ]* [\n] { return 2; }
+  [=]+ [ ]* [\r\n] { return 1; }
+  [-]+ [ ]* [\r\n] { return 2; }
   .? { return 0; }
 */
 }
@@ -197,51 +197,51 @@ int _scan_setext_header_line(const unsigned char *p)
 // Scan a horizontal rule line: "...three or more hyphens, asterisks,
 // or underscores on a line by themselves. If you wish, you may use
 // spaces between the hyphens or asterisks."
-int _scan_hrule(const unsigned char *p)
+bufsize_t _scan_hrule(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
 /*!re2c
-  ([*][ ]*){3,} [ \t]* [\n] { return (p - start); }
-  ([_][ ]*){3,} [ \t]* [\n] { return (p - start); }
-  ([-][ ]*){3,} [ \t]* [\n] { return (p - start); }
+  ([*][ ]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); }
+  ([_][ ]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); }
+  ([-][ ]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); }
   .? { return 0; }
 */
 }
 
 // Scan an opening code fence.
-int _scan_open_code_fence(const unsigned char *p)
+bufsize_t _scan_open_code_fence(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
 /*!re2c
-  [`]{3,} / [^`\n\x00]*[\n] { return (p - start); }
-  [~]{3,} / [^~\n\x00]*[\n] { return (p - start); }
+  [`]{3,} / [^`\r\n\x00]*[\r\n] { return (bufsize_t)(p - start); }
+  [~]{3,} / [^~\r\n\x00]*[\r\n] { return (bufsize_t)(p - start); }
   .?                        { return 0; }
 */
 }
 
 // Scan a closing code fence with length at least len.
-int _scan_close_code_fence(const unsigned char *p)
+bufsize_t _scan_close_code_fence(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
 /*!re2c
-  [`]{3,} / [ \t]*[\n] { return (p - start); }
-  [~]{3,} / [ \t]*[\n] { return (p - start); }
+  [`]{3,} / [ \t]*[\r\n] { return (bufsize_t)(p - start); }
+  [~]{3,} / [ \t]*[\r\n] { return (bufsize_t)(p - start); }
   .? { return 0; }
 */
 }
 
 // Scans an entity.
 // Returns number of chars matched.
-int _scan_entity(const unsigned char *p)
+bufsize_t _scan_entity(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
 /*!re2c
   [&] ([#] ([Xx][A-Fa-f0-9]{1,8}|[0-9]{1,8}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;]
-     { return (p - start); }
+     { return (bufsize_t)(p - start); }
   .? { return 0; }
 */
 }
diff --git a/utf8.c b/utf8.c
index b83c2a5..ba1d873 100644
--- a/utf8.c
+++ b/utf8.c
@@ -30,7 +30,7 @@ static void encode_unknown(cmark_strbuf *buf)
 	cmark_strbuf_put(buf, repl, 3);
 }
 
-static int utf8proc_charlen(const uint8_t *str, int str_len)
+static int utf8proc_charlen(const uint8_t *str, bufsize_t str_len)
 {
 	int length, i;
 
@@ -42,7 +42,7 @@ static int utf8proc_charlen(const uint8_t *str, int str_len)
 	if (!length)
 		return -1;
 
-	if (str_len >= 0 && length > str_len)
+	if (str_len >= 0 && (bufsize_t)length > str_len)
 		return -str_len;
 
 	for (i = 1; i < length; i++) {
@@ -54,7 +54,7 @@ static int utf8proc_charlen(const uint8_t *str, int str_len)
 }
 
 // Validate a single UTF-8 character according to RFC 3629.
-static int utf8proc_valid(const uint8_t *str, int str_len)
+static int utf8proc_valid(const uint8_t *str, bufsize_t str_len)
 {
 	int length = utf8proc_charlen(str, str_len);
 
@@ -109,14 +109,14 @@ static int utf8proc_valid(const uint8_t *str, int str_len)
 	return length;
 }
 
-void utf8proc_detab(cmark_strbuf *ob, const uint8_t *line, size_t size)
+void utf8proc_detab(cmark_strbuf *ob, const uint8_t *line, bufsize_t size)
 {
 	static const uint8_t whitespace[] = "    ";
 
-	size_t i = 0, tab = 0;
+	bufsize_t i = 0, tab = 0;
 
 	while (i < size) {
-		size_t org = i;
+		bufsize_t org = i;
 
 		while (i < size && line[i] != '\t' && line[i] != '\0'
 		       && line[i] < 0x80) {
@@ -151,7 +151,7 @@ void utf8proc_detab(cmark_strbuf *ob, const uint8_t *line, size_t size)
 	}
 }
 
-int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst)
+int utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst)
 {
 	int length;
 	int32_t uc = -1;
@@ -191,7 +191,7 @@ int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst)
 void utf8proc_encode_char(int32_t uc, cmark_strbuf *buf)
 {
 	uint8_t dst[4];
-	int len = 0;
+	bufsize_t len = 0;
 
 	assert(uc >= 0);
 
@@ -227,7 +227,7 @@ void utf8proc_encode_char(int32_t uc, cmark_strbuf *buf)
 	cmark_strbuf_put(buf, dst, len);
 }
 
-void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, int len)
+void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, bufsize_t len)
 {
 	int32_t c;
 
@@ -235,7 +235,7 @@ void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, int len)
 	utf8proc_encode_char(x, dest)
 
 	while (len > 0) {
-		int char_len = utf8proc_iterate(str, len, &c);
+		bufsize_t char_len = utf8proc_iterate(str, len, &c);
 
 		if (char_len >= 0) {
 #include "case_fold_switch.inc"
diff --git a/utf8.h b/utf8.h
index 7df1573..ed1d7ee 100644
--- a/utf8.h
+++ b/utf8.h
@@ -8,10 +8,10 @@
 extern "C" {
 #endif
 
-void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, int len);
+void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, bufsize_t len);
 void utf8proc_encode_char(int32_t uc, cmark_strbuf *buf);
-int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst);
-void utf8proc_detab(cmark_strbuf *dest, const uint8_t *line, size_t size);
+int utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst);
+void utf8proc_detab(cmark_strbuf *dest, const uint8_t *line, bufsize_t size);
 int utf8proc_is_space(int32_t uc);
 int utf8proc_is_punctuation(int32_t uc);
 
diff --git a/xml.c b/xml.c
index 845e553..7eec5a6 100644
--- a/xml.c
+++ b/xml.c
@@ -11,14 +11,9 @@
 
 // Functions to convert cmark_nodes to XML strings.
 
-static void escape_xml(cmark_strbuf *dest, const unsigned char *source, int length)
+static void escape_xml(cmark_strbuf *dest, const unsigned char *source, bufsize_t length)
 {
-	if (source != NULL) {
-		if (length < 0)
-			length = strlen((char *)source);
-
-		houdini_escape_html0(dest, source, (size_t)length, 0);
-	}
+	houdini_escape_html0(dest, source, length, 0);
 }
 
 struct render_state {
@@ -118,10 +113,12 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
 		case CMARK_NODE_LINK:
 		case CMARK_NODE_IMAGE:
 			cmark_strbuf_puts(xml, " destination=\"");
-			escape_xml(xml, node->as.link.url, -1);
+			escape_xml(xml, node->as.link.url.data,
+			           node->as.link.url.len);
 			cmark_strbuf_putc(xml, '"');
 			cmark_strbuf_puts(xml, " title=\"");
-			escape_xml(xml, node->as.link.title, -1);
+			escape_xml(xml, node->as.link.title.data,
+			           node->as.link.title.len);
 			cmark_strbuf_putc(xml, '"');
 			break;
 		default: