From 9acde660827d0ae4953ed89ea4bd677f8cf55119 Mon Sep 17 00:00:00 2001 From: James Adam Date: Mon, 8 Jun 2015 18:24:25 -0400 Subject: [PATCH] Updated to cmark 0.20.0 --- blocks.c | 318 ++++++++++++++++++--------------- buffer.c | 217 +++++++++++------------ buffer.h | 94 ++++------ chunk.h | 33 ++-- cmark.c | 2 +- cmark.h | 2 +- commonmark.c | 52 +++--- commonmark.go | 2 +- config.h | 2 + houdini.h | 19 +- houdini_href_e.c | 4 +- houdini_html_e.c | 6 +- houdini_html_u.c | 53 +++--- html.c | 35 ++-- inlines.c | 147 ++++++++-------- inlines.h | 6 +- iterator.c | 8 +- node.c | 27 +-- node.h | 4 +- parser.h | 6 +- references.c | 4 +- references.h | 4 +- scanners.c | 444 +++++++++++++++++++++++++++++------------------ scanners.h | 30 ++-- scanners.re | 82 ++++----- utf8.c | 20 +-- utf8.h | 6 +- xml.c | 15 +- 28 files changed, 899 insertions(+), 743 deletions(-) diff --git a/blocks.c b/blocks.c index a15f819..a3ac712 100644 --- a/blocks.c +++ b/blocks.c @@ -18,13 +18,19 @@ #define CODE_INDENT 4 #define peek_at(i, n) (i)->data[n] +static inline bool +S_is_line_end_char(char c) +{ + return (c == '\n' || c == '\r'); +} + static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len, bool eof); static void S_process_line(cmark_parser *parser, const unsigned char *buffer, - size_t bytes); + bufsize_t bytes); static cmark_node* make_block(cmark_node_type tag, int start_line, int start_column) { @@ -63,6 +69,10 @@ cmark_parser *cmark_parser_new(int options) parser->root = document; parser->current = document; parser->line_number = 0; + parser->offset = 0; + parser->first_nonspace = 0; + parser->indent = 0; + parser->blank = false; parser->curline = line; parser->last_line_length = 0; parser->linebuf = buf; @@ -85,10 +95,11 @@ static cmark_node* finalize(cmark_parser *parser, cmark_node* b); // Returns true if line has only space characters, else false. -static bool is_blank(cmark_strbuf *s, int offset) +static bool is_blank(cmark_strbuf *s, bufsize_t offset) { while (offset < s->size) { switch (s->ptr[offset]) { + case '\r': case '\n': return true; case ' ': @@ -117,7 +128,7 @@ static inline bool accepts_lines(cmark_node_type block_type) block_type == NODE_CODE_BLOCK); } -static void add_line(cmark_node* node, cmark_chunk *ch, int offset) +static void add_line(cmark_node* node, cmark_chunk *ch, bufsize_t offset) { assert(node->open); cmark_strbuf_put(&node->string_content, ch->data + offset, ch->len - offset); @@ -125,12 +136,13 @@ static void add_line(cmark_node* node, cmark_chunk *ch, int offset) static void remove_trailing_blank_lines(cmark_strbuf *ln) { - int i; + bufsize_t i; + unsigned char c; for (i = ln->size - 1; i >= 0; --i) { - unsigned char c = ln->ptr[i]; + c = ln->ptr[i]; - if (c != ' ' && c != '\t' && c != '\r' && c != '\n') + if (c != ' ' && c != '\t' && !S_is_line_end_char(c)) break; } @@ -139,9 +151,16 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln) return; } - i = cmark_strbuf_strchr(ln, '\n', i); - if (i >= 0) + + for(; i < ln->size; ++i) { + c = ln->ptr[i]; + + if (!S_is_line_end_char(c)) + continue; + cmark_strbuf_truncate(ln, i); + break; + } } // Check to see if a node ends with a blank line, descending @@ -185,8 +204,7 @@ static int break_out_of_lists(cmark_parser *parser, cmark_node ** bptr) static cmark_node* finalize(cmark_parser *parser, cmark_node* b) { - int firstlinelen; - int pos; + bufsize_t pos; cmark_node* item; cmark_node* subitem; cmark_node* parent; @@ -204,9 +222,11 @@ finalize(cmark_parser *parser, cmark_node* b) (b->type == NODE_CODE_BLOCK && b->as.code.fenced) || (b->type == NODE_HEADER && b->as.header.setext)) { b->end_line = parser->line_number; - b->end_column = parser->curline->size - - (parser->curline->ptr[parser->curline->size - 1] == '\n' ? - 1 : 0); + b->end_column = parser->curline->size; + if (b->end_column && parser->curline->ptr[b->end_column - 1] == '\n') + b->end_column -= 1; + if (b->end_column && parser->curline->ptr[b->end_column - 1] == '\r') + b->end_column -= 1; } else { b->end_line = parser->line_number - 1; b->end_column = parser->last_line_length; @@ -232,19 +252,27 @@ finalize(cmark_parser *parser, cmark_node* b) } else { // first line of contents becomes info - firstlinelen = cmark_strbuf_strchr(&b->string_content, '\n', 0); + for (pos = 0; pos < b->string_content.size; ++pos) { + if (S_is_line_end_char(b->string_content.ptr[pos])) + break; + } + assert(pos < b->string_content.size); cmark_strbuf tmp = GH_BUF_INIT; houdini_unescape_html_f( &tmp, b->string_content.ptr, - firstlinelen + pos ); cmark_strbuf_trim(&tmp); cmark_strbuf_unescape(&tmp); b->as.code.info = cmark_chunk_buf_detach(&tmp); - cmark_strbuf_drop(&b->string_content, firstlinelen + 1); + if (b->string_content.ptr[pos] == '\r') + pos += 1; + if (b->string_content.ptr[pos] == '\n') + pos += 1; + cmark_strbuf_drop(&b->string_content, pos); } b->as.code.literal = cmark_chunk_buf_detach(&b->string_content); break; @@ -339,10 +367,10 @@ static void process_inlines(cmark_node* root, cmark_reference_map *refmap, int o // Attempts to parse a list item marker (bullet or enumerated). // On success, returns length of the marker, and populates // data with the details. On failure, returns 0. -static int parse_list_marker(cmark_chunk *input, int pos, cmark_list **dataptr) +static bufsize_t parse_list_marker(cmark_chunk *input, bufsize_t pos, cmark_list **dataptr) { unsigned char c; - int startpos; + bufsize_t startpos; cmark_list *data; startpos = pos; @@ -467,27 +495,39 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len, const unsigned char *end = buffer + len; while (buffer < end) { - const unsigned char *eol - = (const unsigned char *)memchr(buffer, '\n', - end - buffer); + const unsigned char *eol; size_t line_len; + bufsize_t bufsize; + + for (eol = buffer; eol < end; ++eol) { + if (S_is_line_end_char(*eol)) + break; + } + if (eol >= end) + eol = NULL; if (eol) { - line_len = eol + 1 - buffer; + if (eol < end && *eol == '\r') + eol++; + if (eol < end && *eol == '\n') + eol++; + line_len = eol - buffer; } else if (eof) { line_len = end - buffer; } else { - cmark_strbuf_put(parser->linebuf, buffer, end - buffer); + bufsize = cmark_strbuf_check_bufsize(end - buffer); + cmark_strbuf_put(parser->linebuf, buffer, bufsize); break; } + bufsize = cmark_strbuf_check_bufsize(line_len); if (parser->linebuf->size > 0) { - cmark_strbuf_put(parser->linebuf, buffer, line_len); + cmark_strbuf_put(parser->linebuf, buffer, bufsize); S_process_line(parser, parser->linebuf->ptr, parser->linebuf->size); cmark_strbuf_clear(parser->linebuf); } else { - S_process_line(parser, buffer, line_len); + S_process_line(parser, buffer, bufsize); } buffer += line_len; @@ -496,7 +536,7 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len, static void chop_trailing_hashtags(cmark_chunk *ch) { - int n, orig_n; + bufsize_t n, orig_n; cmark_chunk_rtrim(ch); orig_n = n = ch->len - 1; @@ -513,29 +553,42 @@ static void chop_trailing_hashtags(cmark_chunk *ch) } static void -S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) +S_find_first_nonspace(cmark_parser *parser, cmark_chunk *input) +{ + parser->first_nonspace = parser->offset; + while (peek_at(input, parser->first_nonspace) == ' ') { + parser->first_nonspace++; + } + + parser->indent = parser->first_nonspace - parser->offset; + parser->blank = S_is_line_end_char(peek_at(input, parser->first_nonspace)); +} + +static void +S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t bytes) { cmark_node* last_matched_container; - int offset = 0; - int matched = 0; + bufsize_t matched = 0; int lev = 0; int i; cmark_list *data = NULL; bool all_matched = true; cmark_node* container; - bool blank = false; - int first_nonspace; - int indent; + bool indented; cmark_chunk input; bool maybe_lazy; utf8proc_detab(parser->curline, buffer, bytes); + parser->offset = 0; + parser->blank = false; // Add a newline to the end if not present: // TODO this breaks abstraction: - if (parser->curline->ptr[parser->curline->size - 1] != '\n') { + if (parser->curline->size > 0 && + !S_is_line_end_char(parser->curline->ptr[parser->curline->size - 1])) { cmark_strbuf_putc(parser->curline, '\n'); } + input.data = parser->curline->ptr; input.len = parser->curline->size; @@ -550,32 +603,26 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) while (container->last_child && container->last_child->open) { container = container->last_child; - first_nonspace = offset; - while (peek_at(&input, first_nonspace) == ' ') { - first_nonspace++; - } - - indent = first_nonspace - offset; - blank = peek_at(&input, first_nonspace) == '\n'; + S_find_first_nonspace(parser, &input); if (container->type == NODE_BLOCK_QUOTE) { - matched = indent <= 3 && peek_at(&input, first_nonspace) == '>'; + matched = parser->indent <= 3 && peek_at(&input, parser->first_nonspace) == '>'; if (matched) { - offset = first_nonspace + 1; - if (peek_at(&input, offset) == ' ') - offset++; + parser->offset = parser->first_nonspace + 1; + if (peek_at(&input, parser->offset) == ' ') + parser->offset++; } else { all_matched = false; } } else if (container->type == NODE_ITEM) { - if (indent >= container->as.list.marker_offset + + if (parser->indent >= container->as.list.marker_offset + container->as.list.padding) { - offset += container->as.list.marker_offset + + parser->offset += container->as.list.marker_offset + container->as.list.padding; - } else if (blank) { - offset = first_nonspace; + } else if (parser->blank) { + parser->offset = parser->first_nonspace; } else { all_matched = false; } @@ -583,34 +630,34 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) } else if (container->type == NODE_CODE_BLOCK) { if (!container->as.code.fenced) { // indented - if (indent >= CODE_INDENT) { - offset += CODE_INDENT; - } else if (blank) { - offset = first_nonspace; + if (parser->indent >= CODE_INDENT) { + parser->offset += CODE_INDENT; + } else if (parser->blank) { + parser->offset = parser->first_nonspace; } else { all_matched = false; } } else { // fenced matched = 0; - if (indent <= 3 && - (peek_at(&input, first_nonspace) == + if (parser->indent <= 3 && + (peek_at(&input, parser->first_nonspace) == container->as.code.fence_char)) { matched = scan_close_code_fence(&input, - first_nonspace); + parser->first_nonspace); } if (matched >= container->as.code.fence_length) { // closing fence - and since we're at // the end of a line, we can return: all_matched = false; - offset += matched; + parser->offset += matched; parser->current = finalize(parser, container); goto finished; } else { - // skip opt. spaces of fence offset + // skip opt. spaces of fence parser->offset i = container->as.code.fence_offset; while (i > 0 && - peek_at(&input, offset) == ' ') { - offset++; + peek_at(&input, parser->offset) == ' ') { + parser->offset++; i--; } } @@ -622,13 +669,13 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) } else if (container->type == NODE_HTML) { - if (blank) { + if (parser->blank) { all_matched = false; } } else if (container->type == NODE_PARAGRAPH) { - if (blank) { + if (parser->blank) { all_matched = false; } @@ -643,7 +690,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) last_matched_container = container; // check to see if we've hit 2nd blank line, break out of list: - if (blank && container->last_line_blank) { + if (parser->blank && container->last_line_blank) { break_out_of_lists(parser, &container); } @@ -652,40 +699,23 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) while (container->type != NODE_CODE_BLOCK && container->type != NODE_HTML) { - first_nonspace = offset; - while (peek_at(&input, first_nonspace) == ' ') - first_nonspace++; - - indent = first_nonspace - offset; - blank = peek_at(&input, first_nonspace) == '\n'; - - if (indent >= CODE_INDENT) { - if (!maybe_lazy && !blank) { - offset += CODE_INDENT; - container = add_child(parser, container, NODE_CODE_BLOCK, offset + 1); - container->as.code.fenced = false; - container->as.code.fence_char = 0; - container->as.code.fence_length = 0; - container->as.code.fence_offset = 0; - container->as.code.info = cmark_chunk_literal(""); - } else { // indent > 4 in lazy line - break; - } + S_find_first_nonspace(parser, &input); + indented = parser->indent >= CODE_INDENT; - } else if (peek_at(&input, first_nonspace) == '>') { + if (!indented && peek_at(&input, parser->first_nonspace) == '>') { - offset = first_nonspace + 1; + parser->offset = parser->first_nonspace + 1; // optional following character - if (peek_at(&input, offset) == ' ') - offset++; - container = add_child(parser, container, NODE_BLOCK_QUOTE, offset + 1); + if (peek_at(&input, parser->offset) == ' ') + parser->offset++; + container = add_child(parser, container, NODE_BLOCK_QUOTE, parser->offset + 1); - } else if ((matched = scan_atx_header_start(&input, first_nonspace))) { + } else if (!indented && (matched = scan_atx_header_start(&input, parser->first_nonspace))) { - offset = first_nonspace + matched; - container = add_child(parser, container, NODE_HEADER, offset + 1); + parser->offset = parser->first_nonspace + matched; + container = add_child(parser, container, NODE_HEADER, parser->offset + 1); - int hashpos = cmark_chunk_strchr(&input, '#', first_nonspace); + bufsize_t hashpos = cmark_chunk_strchr(&input, '#', parser->first_nonspace); int level = 0; while (peek_at(&input, hashpos) == '#') { @@ -695,78 +725,95 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) container->as.header.level = level; container->as.header.setext = false; - } else if ((matched = scan_open_code_fence(&input, first_nonspace))) { + } else if (!indented && (matched = scan_open_code_fence(&input, parser->first_nonspace))) { - container = add_child(parser, container, NODE_CODE_BLOCK, first_nonspace + 1); + container = add_child(parser, container, NODE_CODE_BLOCK, parser->first_nonspace + 1); container->as.code.fenced = true; - container->as.code.fence_char = peek_at(&input, first_nonspace); + container->as.code.fence_char = peek_at(&input, parser->first_nonspace); container->as.code.fence_length = matched; - container->as.code.fence_offset = first_nonspace - offset; + container->as.code.fence_offset = parser->first_nonspace - parser->offset; container->as.code.info = cmark_chunk_literal(""); - offset = first_nonspace + matched; + parser->offset = parser->first_nonspace + matched; - } else if ((matched = scan_html_block_tag(&input, first_nonspace))) { + } else if (!indented && (matched = scan_html_block_tag(&input, parser->first_nonspace))) { - container = add_child(parser, container, NODE_HTML, first_nonspace + 1); - // note, we don't adjust offset because the tag is part of the text + container = add_child(parser, container, NODE_HTML, parser->first_nonspace + 1); + // note, we don't adjust parser->offset because the tag is part of the text - } else if (container->type == NODE_PARAGRAPH && - (lev = scan_setext_header_line(&input, first_nonspace)) && + } else if (!indented && + container->type == NODE_PARAGRAPH && + (lev = scan_setext_header_line(&input, parser->first_nonspace)) && // check that there is only one line in the paragraph: - cmark_strbuf_strrchr(&container->string_content, '\n', - cmark_strbuf_len(&container->string_content) - 2) < 0) { + (cmark_strbuf_strrchr(&container->string_content, '\n', + cmark_strbuf_len(&container->string_content) - 2) < 0)) { container->type = NODE_HEADER; container->as.header.level = lev; container->as.header.setext = true; - offset = input.len - 1; + parser->offset = input.len - 1; - } else if (!(container->type == NODE_PARAGRAPH && !all_matched) && - (matched = scan_hrule(&input, first_nonspace))) { + } else if (!indented && + !(container->type == NODE_PARAGRAPH && + !all_matched) && + (matched = scan_hrule(&input, parser->first_nonspace))) { // it's only now that we know the line is not part of a setext header: - container = add_child(parser, container, NODE_HRULE, first_nonspace + 1); + container = add_child(parser, container, NODE_HRULE, parser->first_nonspace + 1); container = finalize(parser, container); - offset = input.len - 1; + parser->offset = input.len - 1; - } else if ((matched = parse_list_marker(&input, first_nonspace, &data))) { + } else if ((matched = parse_list_marker(&input, parser->first_nonspace, &data)) && + (!indented || container->type == NODE_LIST)) { + // Note that we can have new list items starting with >= 4 + // spaces indent, as long as the list container is still open. // compute padding: - offset = first_nonspace + matched; + parser->offset = parser->first_nonspace + matched; i = 0; - while (i <= 5 && peek_at(&input, offset + i) == ' ') { + while (i <= 5 && peek_at(&input, parser->offset + i) == ' ') { i++; } // i = number of spaces after marker, up to 5 - if (i >= 5 || i < 1 || peek_at(&input, offset) == '\n') { + if (i >= 5 || i < 1 || + S_is_line_end_char(peek_at(&input, parser->offset))) { data->padding = matched + 1; if (i > 0) { - offset += 1; + parser->offset += 1; } } else { data->padding = matched + i; - offset += i; + parser->offset += i; } // check container; if it's a list, see if this list item // can continue the list; otherwise, create a list container. - data->marker_offset = indent; + data->marker_offset = parser->indent; if (container->type != NODE_LIST || !lists_match(&container->as.list, data)) { container = add_child(parser, container, NODE_LIST, - first_nonspace + 1); + parser->first_nonspace + 1); memcpy(&container->as.list, data, sizeof(*data)); } // add the list item container = add_child(parser, container, NODE_ITEM, - first_nonspace + 1); + parser->first_nonspace + 1); /* TODO: static */ memcpy(&container->as.list, data, sizeof(*data)); free(data); + + } else if (indented && !maybe_lazy && !parser->blank) { + parser->offset += CODE_INDENT; + container = add_child(parser, container, NODE_CODE_BLOCK, parser->offset + 1); + container->as.code.fenced = false; + container->as.code.fence_char = 0; + container->as.code.fence_length = 0; + container->as.code.fence_offset = 0; + container->as.code.info = cmark_chunk_literal(""); + } else { break; } @@ -778,17 +825,12 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) maybe_lazy = false; } - // what remains at offset is a text line. add the text to the + // what remains at parser->offset is a text line. add the text to the // appropriate container. - first_nonspace = offset; - while (peek_at(&input, first_nonspace) == ' ') - first_nonspace++; + S_find_first_nonspace(parser, &input); - indent = first_nonspace - offset; - blank = peek_at(&input, first_nonspace) == '\n'; - - if (blank && container->last_child) { + if (parser->blank && container->last_child) { container->last_child->last_line_blank = true; } @@ -796,7 +838,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) // and we don't count blanks in fenced code for purposes of tight/loose // lists or breaking out of lists. we also don't set last_line_blank // on an empty list item. - container->last_line_blank = (blank && + container->last_line_blank = (parser->blank && container->type != NODE_BLOCK_QUOTE && container->type != NODE_HEADER && !(container->type == NODE_CODE_BLOCK && @@ -813,11 +855,11 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) if (parser->current != last_matched_container && container == last_matched_container && - !blank && + !parser->blank && parser->current->type == NODE_PARAGRAPH && cmark_strbuf_len(&parser->current->string_content) > 0) { - add_line(parser->current, &input, offset); + add_line(parser->current, &input, parser->offset); } else { // not a lazy continuation @@ -830,9 +872,9 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) if (container->type == NODE_CODE_BLOCK || container->type == NODE_HTML) { - add_line(container, &input, offset); + add_line(container, &input, parser->offset); - } else if (blank) { + } else if (parser->blank) { // ??? do nothing @@ -842,22 +884,26 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) container->as.header.setext == false) { chop_trailing_hashtags(&input); } - add_line(container, &input, first_nonspace); + add_line(container, &input, parser->first_nonspace); } else { // create paragraph container for line - container = add_child(parser, container, NODE_PARAGRAPH, first_nonspace + 1); - add_line(container, &input, first_nonspace); + container = add_child(parser, container, NODE_PARAGRAPH, parser->first_nonspace + 1); + add_line(container, &input, parser->first_nonspace); } parser->current = container; } finished: - parser->last_line_length = parser->curline->size - - (parser->curline->ptr[parser->curline->size - 1] == '\n' ? - 1 : 0); - ; + parser->last_line_length = parser->curline->size; + if (parser->last_line_length && + parser->curline->ptr[parser->last_line_length - 1] == '\n') + parser->last_line_length -= 1; + if (parser->last_line_length && + parser->curline->ptr[parser->last_line_length - 1] == '\r') + parser->last_line_length -= 1; + cmark_strbuf_clear(parser->curline); } diff --git a/buffer.c b/buffer.c index 5ec8b49..7d16af8 100644 --- a/buffer.c +++ b/buffer.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "config.h" #include "cmark_ctype.h" @@ -13,83 +14,88 @@ * assume ptr is non-NULL and zero terminated even for new cmark_strbufs. */ unsigned char cmark_strbuf__initbuf[1]; -unsigned char cmark_strbuf__oom[1]; - -#define ENSURE_SIZE(b, d) \ - if ((d) > buf->asize && cmark_strbuf_grow(b, (d)) < 0) \ - return -1; #ifndef MIN #define MIN(x,y) ((xasize = 0; buf->size = 0; buf->ptr = cmark_strbuf__initbuf; - if (initial_size) + if (initial_size > 0) cmark_strbuf_grow(buf, initial_size); } -int cmark_strbuf_try_grow(cmark_strbuf *buf, int target_size, bool mark_oom) +void cmark_strbuf_overflow_err() { + fprintf(stderr, "String buffer overflow"); + abort(); +} + +static inline void +S_strbuf_grow_by(cmark_strbuf *buf, size_t add) { + size_t target_size = (size_t)buf->size + add; + + if (target_size < add /* Integer overflow. */ + || target_size > BUFSIZE_MAX /* Truncation overflow. */ + ) { + cmark_strbuf_overflow_err(); + return; /* unreachable */ + } + + if ((bufsize_t)target_size >= buf->asize) + cmark_strbuf_grow(buf, (bufsize_t)target_size); +} + +void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) { unsigned char *new_ptr; - int new_size; - - if (buf->ptr == cmark_strbuf__oom) - return -1; - if (target_size <= buf->asize) - return 0; + if (target_size < buf->asize) + return; if (buf->asize == 0) { - new_size = target_size; new_ptr = NULL; } else { - new_size = buf->asize; new_ptr = buf->ptr; } - /* grow the buffer size by 1.5, until it's big enough - * to fit our target size */ - while (new_size < target_size) - new_size = (new_size << 1) - (new_size >> 1); + /* Oversize the buffer by 50% to guarantee amortized linear time + * complexity on append operations. */ + size_t new_size = (size_t)target_size + (size_t)target_size / 2; + + /* Account for terminating null byte. */ + new_size += 1; /* round allocation up to multiple of 8 */ new_size = (new_size + 7) & ~7; + if (new_size < (size_t)target_size /* Integer overflow. */ + || new_size > BUFSIZE_MAX /* Truncation overflow. */ + ) { + if (target_size >= BUFSIZE_MAX) { + /* No space for terminating null byte. */ + cmark_strbuf_overflow_err(); + return; /* unreachable */ + } + /* Oversize by the maximum possible amount. */ + new_size = BUFSIZE_MAX; + } + new_ptr = (unsigned char *)realloc(new_ptr, new_size); if (!new_ptr) { - if (mark_oom) - buf->ptr = cmark_strbuf__oom; - return -1; + perror("realloc in cmark_strbuf_grow"); + abort(); } - buf->asize = new_size; + buf->asize = (bufsize_t)new_size; buf->ptr = new_ptr; - - /* truncate the existing buffer size if necessary */ - if (buf->size >= buf->asize) - buf->size = buf->asize - 1; - buf->ptr[buf->size] = '\0'; - - return 0; -} - -int cmark_strbuf_grow(cmark_strbuf *buf, int target_size) -{ - return cmark_strbuf_try_grow(buf, target_size, true); -} - -bool cmark_strbuf_oom(const cmark_strbuf *buf) -{ - return (buf->ptr == cmark_strbuf__oom); } -size_t cmark_strbuf_len(const cmark_strbuf *buf) +bufsize_t cmark_strbuf_len(const cmark_strbuf *buf) { return buf->size; } @@ -98,7 +104,7 @@ void cmark_strbuf_free(cmark_strbuf *buf) { if (!buf) return; - if (buf->ptr != cmark_strbuf__initbuf && buf->ptr != cmark_strbuf__oom) + if (buf->ptr != cmark_strbuf__initbuf) free(buf->ptr); cmark_strbuf_init(buf, 0); @@ -112,106 +118,106 @@ void cmark_strbuf_clear(cmark_strbuf *buf) buf->ptr[0] = '\0'; } -int cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, int len) +void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, bufsize_t len) { if (len <= 0 || data == NULL) { cmark_strbuf_clear(buf); } else { if (data != buf->ptr) { - ENSURE_SIZE(buf, len + 1); + if (len >= buf->asize) + cmark_strbuf_grow(buf, len); memmove(buf->ptr, data, len); } buf->size = len; buf->ptr[buf->size] = '\0'; } - return 0; } -int cmark_strbuf_sets(cmark_strbuf *buf, const char *string) +void cmark_strbuf_sets(cmark_strbuf *buf, const char *string) { - return cmark_strbuf_set(buf, - (const unsigned char *)string, - string ? strlen(string) : 0); + cmark_strbuf_set(buf, (const unsigned char *)string, + string ? cmark_strbuf_safe_strlen(string) : 0); } -int cmark_strbuf_putc(cmark_strbuf *buf, int c) +void cmark_strbuf_putc(cmark_strbuf *buf, int c) { - ENSURE_SIZE(buf, buf->size + 2); + S_strbuf_grow_by(buf, 1); buf->ptr[buf->size++] = c; buf->ptr[buf->size] = '\0'; - return 0; } -int cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, int len) +void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, bufsize_t len) { if (len <= 0) - return 0; + return; - ENSURE_SIZE(buf, buf->size + len + 1); + S_strbuf_grow_by(buf, len); memmove(buf->ptr + buf->size, data, len); buf->size += len; buf->ptr[buf->size] = '\0'; - return 0; } -int cmark_strbuf_puts(cmark_strbuf *buf, const char *string) +void cmark_strbuf_puts(cmark_strbuf *buf, const char *string) { - return cmark_strbuf_put(buf, (const unsigned char *)string, strlen(string)); + cmark_strbuf_put(buf, (const unsigned char *)string, + cmark_strbuf_safe_strlen(string)); } -int cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap) +void cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap) { - const int expected_size = buf->size + (strlen(format) * 2); - int len; - - ENSURE_SIZE(buf, expected_size); + size_t expected_size = strlen(format); + if (expected_size <= SIZE_MAX / 2) + expected_size *= 2; + S_strbuf_grow_by(buf, expected_size); while (1) { va_list args; va_copy(args, ap); - len = vsnprintf( + int len = vsnprintf( (char *)buf->ptr + buf->size, buf->asize - buf->size, format, args ); +#ifndef HAVE_C99_SNPRINTF + // Assume we're on Windows. + if (len < 0) { + len = _vscprintf(format, args); + } +#endif va_end(args); if (len < 0) { - free(buf->ptr); - buf->ptr = cmark_strbuf__oom; - return -1; + perror("vsnprintf in cmark_strbuf_vprintf"); + abort(); } - if (len + 1 <= buf->asize - buf->size) { + if ((size_t)len < (size_t)(buf->asize - buf->size)) { buf->size += len; break; } - ENSURE_SIZE(buf, buf->size + len + 1); + S_strbuf_grow_by(buf, len); } - - return 0; } -int cmark_strbuf_printf(cmark_strbuf *buf, const char *format, ...) +void cmark_strbuf_printf(cmark_strbuf *buf, const char *format, ...) { - int r; va_list ap; va_start(ap, format); - r = cmark_strbuf_vprintf(buf, format, ap); + cmark_strbuf_vprintf(buf, format, ap); va_end(ap); - - return r; } -void cmark_strbuf_copy_cstr(char *data, int datasize, const cmark_strbuf *buf) +void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize, const cmark_strbuf *buf) { - int copylen; + bufsize_t copylen; - assert(data && datasize && buf); + assert(buf); + if (!data || datasize <= 0) + return; data[0] = '\0'; @@ -236,7 +242,7 @@ unsigned char *cmark_strbuf_detach(cmark_strbuf *buf) { unsigned char *data = buf->ptr; - if (buf->asize == 0 || buf->ptr == cmark_strbuf__oom) { + if (buf->asize == 0) { /* return an empty string */ return (unsigned char *)calloc(1, 1); } @@ -245,22 +251,6 @@ unsigned char *cmark_strbuf_detach(cmark_strbuf *buf) return data; } -void cmark_strbuf_attach(cmark_strbuf *buf, unsigned char *ptr, int asize) -{ - cmark_strbuf_free(buf); - - if (ptr) { - buf->ptr = ptr; - buf->size = strlen((char *)ptr); - if (asize) - buf->asize = (asize < buf->size) ? buf->size + 1 : asize; - else /* pass 0 to fall back on strlen + 1 */ - buf->asize = buf->size + 1; - } else { - cmark_strbuf_grow(buf, asize); - } -} - int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b) { int result = memcmp(a->ptr, b->ptr, MIN(a->size, b->size)); @@ -268,20 +258,28 @@ int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b) (a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0; } -int cmark_strbuf_strchr(const cmark_strbuf *buf, int c, int pos) +bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos) { + if (pos >= buf->size) + return -1; + if (pos < 0) + pos = 0; + const unsigned char *p = (unsigned char *)memchr(buf->ptr + pos, c, buf->size - pos); if (!p) return -1; - return (int)(p - (const unsigned char *)buf->ptr); + return (bufsize_t)(p - (const unsigned char *)buf->ptr); } -int cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, int pos) +bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos) { - int i; + if (pos < 0 || buf->size == 0) + return -1; + if (pos >= buf->size) + pos = buf->size - 1; - for (i = pos; i >= 0; i--) { + for (bufsize_t i = pos; i >= 0; i--) { if (buf->ptr[i] == (unsigned char) c) return i; } @@ -289,17 +287,22 @@ int cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, int pos) return -1; } -void cmark_strbuf_truncate(cmark_strbuf *buf, int len) +void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len) { + if (len < 0) + len = 0; + if (len < buf->size) { buf->size = len; buf->ptr[buf->size] = '\0'; } } -void cmark_strbuf_drop(cmark_strbuf *buf, int n) +void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n) { if (n > 0) { + if (n > buf->size) + n = buf->size; buf->size = buf->size - n; if (buf->size) memmove(buf->ptr, buf->ptr + n, buf->size); @@ -325,7 +328,7 @@ void cmark_strbuf_rtrim(cmark_strbuf *buf) void cmark_strbuf_trim(cmark_strbuf *buf) { - int i = 0; + bufsize_t i = 0; if (!buf->size) return; @@ -343,7 +346,7 @@ void cmark_strbuf_trim(cmark_strbuf *buf) void cmark_strbuf_normalize_whitespace(cmark_strbuf *s) { bool last_char_was_space = false; - int r, w; + bufsize_t r, w; for (r = 0, w = 0; r < s->size; ++r) { switch (s->ptr[r]) { @@ -368,7 +371,7 @@ void cmark_strbuf_normalize_whitespace(cmark_strbuf *s) // Destructively unescape a string: remove backslashes before punctuation chars. extern void cmark_strbuf_unescape(cmark_strbuf *buf) { - int r, w; + bufsize_t r, w; for (r = 0, w = 0; r < buf->size; ++r) { if (buf->ptr[r] == '\\' && cmark_ispunct(buf->ptr[r + 1])) diff --git a/buffer.h b/buffer.h index fb9f910..babd051 100644 --- a/buffer.h +++ b/buffer.h @@ -3,22 +3,25 @@ #include #include +#include +#include #include "config.h" #ifdef __cplusplus extern "C" { #endif +typedef int bufsize_t; + typedef struct { unsigned char *ptr; - int asize, size; + bufsize_t asize, size; } cmark_strbuf; extern unsigned char cmark_strbuf__initbuf[]; -extern unsigned char cmark_strbuf__oom[]; - #define GH_BUF_INIT { cmark_strbuf__initbuf, 0, 0 } +#define BUFSIZE_MAX INT_MAX /** * Initialize a cmark_strbuf structure. @@ -26,51 +29,22 @@ extern unsigned char cmark_strbuf__oom[]; * For the cases where GH_BUF_INIT cannot be used to do static * initialization. */ -void cmark_strbuf_init(cmark_strbuf *buf, int initial_size); - -/** - * Attempt to grow the buffer to hold at least `target_size` bytes. - * - * If the allocation fails, this will return an error. If mark_oom is true, - * this will mark the buffer as invalid for future operations; if false, - * existing buffer content will be preserved, but calling code must handle - * that buffer was not expanded. - */ -int cmark_strbuf_try_grow(cmark_strbuf *buf, int target_size, bool mark_oom); +void cmark_strbuf_init(cmark_strbuf *buf, bufsize_t initial_size); /** * Grow the buffer to hold at least `target_size` bytes. - * - * If the allocation fails, this will return an error and the buffer will be - * marked as invalid for future operations, invaliding contents. - * - * @return 0 on success or -1 on failure */ -int cmark_strbuf_grow(cmark_strbuf *buf, int target_size); +void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size); void cmark_strbuf_free(cmark_strbuf *buf); void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b); -/** - * Test if there have been any reallocation failures with this cmark_strbuf. - * - * Any function that writes to a cmark_strbuf can fail due to memory allocation - * issues. If one fails, the cmark_strbuf will be marked with an OOM error and - * further calls to modify the buffer will fail. Check cmark_strbuf_oom() at the - * end of your sequence and it will be true if you ran out of memory at any - * point with that buffer. - * - * @return false if no error, true if allocation error - */ -bool cmark_strbuf_oom(const cmark_strbuf *buf); - -size_t cmark_strbuf_len(const cmark_strbuf *buf); +bufsize_t cmark_strbuf_len(const cmark_strbuf *buf); int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b); -void cmark_strbuf_attach(cmark_strbuf *buf, unsigned char *ptr, int asize); unsigned char *cmark_strbuf_detach(cmark_strbuf *buf); -void cmark_strbuf_copy_cstr(char *data, int datasize, const cmark_strbuf *buf); +void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize, const cmark_strbuf *buf); static inline const char *cmark_strbuf_cstr(const cmark_strbuf *buf) { @@ -79,33 +53,41 @@ static inline const char *cmark_strbuf_cstr(const cmark_strbuf *buf) #define cmark_strbuf_at(buf, n) ((buf)->ptr[n]) -/* - * Functions below that return int value error codes will return 0 on - * success or -1 on failure (which generally means an allocation failed). - * Using a cmark_strbuf where the allocation has failed with result in -1 from - * all further calls using that buffer. As a result, you can ignore the - * return code of these functions and call them in a series then just call - * cmark_strbuf_oom at the end. - */ -int cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, int len); -int cmark_strbuf_sets(cmark_strbuf *buf, const char *string); -int cmark_strbuf_putc(cmark_strbuf *buf, int c); -int cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, int len); -int cmark_strbuf_puts(cmark_strbuf *buf, const char *string); -int cmark_strbuf_printf(cmark_strbuf *buf, const char *format, ...) -CMARK_ATTRIBUTE((format (printf, 2, 3))); -int cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap); +void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, bufsize_t len); +void cmark_strbuf_sets(cmark_strbuf *buf, const char *string); +void cmark_strbuf_putc(cmark_strbuf *buf, int c); +void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, bufsize_t len); +void cmark_strbuf_puts(cmark_strbuf *buf, const char *string); +void cmark_strbuf_printf(cmark_strbuf *buf, const char *format, ...) + CMARK_ATTRIBUTE((format (printf, 2, 3))); +void cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap); void cmark_strbuf_clear(cmark_strbuf *buf); -int cmark_strbuf_strchr(const cmark_strbuf *buf, int c, int pos); -int cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, int pos); -void cmark_strbuf_drop(cmark_strbuf *buf, int n); -void cmark_strbuf_truncate(cmark_strbuf *buf, int len); +bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos); +bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos); +void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n); +void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len); void cmark_strbuf_rtrim(cmark_strbuf *buf); void cmark_strbuf_trim(cmark_strbuf *buf); void cmark_strbuf_normalize_whitespace(cmark_strbuf *s); void cmark_strbuf_unescape(cmark_strbuf *s); +/* Print error and abort. */ +void cmark_strbuf_overflow_err(void); + +static inline bufsize_t +cmark_strbuf_check_bufsize(size_t size) { + if (size > BUFSIZE_MAX) { + cmark_strbuf_overflow_err(); + } + return (bufsize_t)size; +} + +static inline bufsize_t +cmark_strbuf_safe_strlen(const char *str) { + return cmark_strbuf_check_bufsize(strlen(str)); +} + #ifdef __cplusplus } #endif diff --git a/chunk.h b/chunk.h index 54c4b16..f23a02d 100644 --- a/chunk.h +++ b/chunk.h @@ -7,10 +7,12 @@ #include "cmark_ctype.h" #include "buffer.h" +#define CMARK_CHUNK_EMPTY { NULL, 0, 0 } + typedef struct { unsigned char *data; - int len; - int alloc; // also implies a NULL-terminated string + bufsize_t len; + bufsize_t alloc; // also implies a NULL-terminated string } cmark_chunk; static inline void cmark_chunk_free(cmark_chunk *c) @@ -49,10 +51,10 @@ static inline void cmark_chunk_trim(cmark_chunk *c) cmark_chunk_rtrim(c); } -static inline int cmark_chunk_strchr(cmark_chunk *ch, int c, int offset) +static inline bufsize_t cmark_chunk_strchr(cmark_chunk *ch, int c, bufsize_t offset) { const unsigned char *p = (unsigned char *)memchr(ch->data + offset, c, ch->len - offset); - return p ? (int)(p - ch->data) : ch->len; + return p ? (bufsize_t)(p - ch->data) : ch->len; } static inline const char *cmark_chunk_to_cstr(cmark_chunk *c) @@ -64,7 +66,9 @@ static inline const char *cmark_chunk_to_cstr(cmark_chunk *c) } str = (unsigned char *)malloc(c->len + 1); if(str != NULL) { - memcpy(str, c->data, c->len); + if(c->len > 0) { + memcpy(str, c->data, c->len); + } str[c->len] = 0; } c->data = str; @@ -78,19 +82,26 @@ static inline void cmark_chunk_set_cstr(cmark_chunk *c, const char *str) if (c->alloc) { free(c->data); } - c->len = strlen(str); - c->data = (unsigned char *)malloc(c->len + 1); - c->alloc = 1; - memcpy(c->data, str, c->len + 1); + if (str == NULL) { + c->len = 0; + c->data = NULL; + c->alloc = 0; + } else { + c->len = cmark_strbuf_safe_strlen(str); + c->data = (unsigned char *)malloc(c->len + 1); + c->alloc = 1; + memcpy(c->data, str, c->len + 1); + } } static inline cmark_chunk cmark_chunk_literal(const char *data) { - cmark_chunk c = {(unsigned char *)data, data ? strlen(data) : 0, 0}; + bufsize_t len = data ? cmark_strbuf_safe_strlen(data) : 0; + cmark_chunk c = {(unsigned char *)data, len, 0}; return c; } -static inline cmark_chunk cmark_chunk_dup(const cmark_chunk *ch, int pos, int len) +static inline cmark_chunk cmark_chunk_dup(const cmark_chunk *ch, bufsize_t pos, bufsize_t len) { cmark_chunk c = {ch->data + pos, len, 0}; return c; diff --git a/cmark.c b/cmark.c index 79ceabf..35765b1 100644 --- a/cmark.c +++ b/cmark.c @@ -9,7 +9,7 @@ const int cmark_version = CMARK_VERSION; const char cmark_version_string[] = CMARK_VERSION_STRING; -char *cmark_markdown_to_html(const char *text, int len, int options) +char *cmark_markdown_to_html(const char *text, size_t len, int options) { cmark_node *doc; char *result; diff --git a/cmark.h b/cmark.h index e41d995..a7670e9 100644 --- a/cmark.h +++ b/cmark.h @@ -24,7 +24,7 @@ extern "C" { * UTF-8-encoded string. */ CMARK_EXPORT -char *cmark_markdown_to_html(const char *text, int len, int options); +char *cmark_markdown_to_html(const char *text, size_t len, int options); /** ## Node Structure */ diff --git a/commonmark.c b/commonmark.c index 805f139..4594748 100644 --- a/commonmark.c +++ b/commonmark.c @@ -20,7 +20,7 @@ struct render_state { int column; int width; int need_cr; - int last_breakable; + bufsize_t last_breakable; bool begin_line; bool no_wrap; bool in_tight_list_item; @@ -237,28 +237,31 @@ shortest_unused_backtick_sequence(cmark_chunk *code) static bool is_autolink(cmark_node *node) { - const char *title; - const char *url; + cmark_chunk *title; + cmark_chunk *url; + cmark_node *link_text; if (node->type != CMARK_NODE_LINK) { return false; } - url = cmark_node_get_url(node); - if (url == NULL || - _scan_scheme((unsigned char *)url) == 0) { + url = &node->as.link.url; + if (url->len == 0 || scan_scheme(url, 0) == 0) { return false; } - title = cmark_node_get_title(node); + title = &node->as.link.title; // if it has a title, we can't treat it as an autolink: - if (title != NULL && strlen(title) > 0) { + if (title->len > 0) { return false; } - cmark_consolidate_text_nodes(node); - return (strncmp(url, - (char*)node->as.literal.data, - node->as.literal.len) == 0); + + link_text = node->first_child; + cmark_consolidate_text_nodes(link_text); + return (url->len == link_text->as.literal.len && + strncmp((char*)url->data, + (char*)link_text->as.literal.data, + link_text->as.literal.len) == 0); } // if node is a block node, returns node. @@ -285,11 +288,11 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, int numticks; int i; bool entering = (ev_type == CMARK_EVENT_ENTER); - const char *info; - const char *title; + cmark_chunk *info; + cmark_chunk *title; cmark_strbuf listmarker = GH_BUF_INIT; char *emph_delim; - int marker_width; + bufsize_t marker_width; // Don't adjust tight list status til we've started the list. // Otherwise we loose the blank line between a paragraph and @@ -392,12 +395,12 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, case CMARK_NODE_CODE_BLOCK: blankline(state); - info = cmark_node_get_fence_info(node); + info = &node->as.code.info; code = &node->as.code.literal; // use indented form if no info, and code doesn't // begin or end with a blank line, and code isn't // first thing in a list item - if ((info == NULL || strlen(info) == 0) && + if (info->len == 0 && (code->len > 2 && !isspace(code->data[0]) && !(isspace(code->data[code->len - 1]) && @@ -418,7 +421,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, lit(state, "`", false); } lit(state, " ", false); - out(state, cmark_chunk_literal(info), false, LITERAL); + out(state, *info, false, LITERAL); cr(state); out(state, node->as.code.literal, false, LITERAL); cr(state); @@ -538,11 +541,10 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, out(state, cmark_chunk_literal(cmark_node_get_url(node)), false, URL); - title = cmark_node_get_title(node); - if (title && strlen(title) > 0) { + title = &node->as.link.title; + if (title->len > 0) { lit(state, " \"", true); - out(state, cmark_chunk_literal(title), - false, TITLE); + out(state, *title, false, TITLE); lit(state, "\"", false); } lit(state, ")", false); @@ -556,10 +558,10 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, } else { lit(state, "](", false); out(state, cmark_chunk_literal(cmark_node_get_url(node)), false, URL); - title = cmark_node_get_title(node); - if (title && strlen(title) > 0) { + title = &node->as.link.title; + if (title->len > 0) { lit(state, " \"", true); - out(state, cmark_chunk_literal(title), false, TITLE); + out(state, *title, false, TITLE); lit(state, "\"", false); } lit(state, ")", false); diff --git a/commonmark.go b/commonmark.go index df479ed..aff8eb7 100644 --- a/commonmark.go +++ b/commonmark.go @@ -24,7 +24,7 @@ func Md2Html(mdtext string, options int) string { mdtext += "\n" } mdCstr := C.CString(mdtext) - strLen := C.int(len(mdtext)) + strLen := C.size_t(len(mdtext)) defer C.free(unsafe.Pointer(mdCstr)) htmlString := C.cmark_markdown_to_html(mdCstr, strLen, C.int(options)) defer C.free(unsafe.Pointer(htmlString)) diff --git a/config.h b/config.h index 3de8c11..8eb09a1 100644 --- a/config.h +++ b/config.h @@ -21,3 +21,5 @@ #ifndef HAVE_VA_COPY #define va_copy(dest, src) ((dest) = (src)) #endif + +#define HAVE_C99_SNPRINTF diff --git a/houdini.h b/houdini.h index 9f00f6d..b926cf3 100644 --- a/houdini.h +++ b/houdini.h @@ -31,19 +31,12 @@ extern "C" { #define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10) #define HOUDINI_UNESCAPED_SIZE(x) (x) -extern size_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, size_t size, int secure); -extern int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_xml(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_uri(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_url(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_unescape_uri(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_unescape_url(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_js(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_unescape_js(cmark_strbuf *ob, const uint8_t *src, size_t size); +extern bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); +extern int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); +extern int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size, int secure); +extern int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); +extern void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); +extern int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); #ifdef __cplusplus } diff --git a/houdini_href_e.c b/houdini_href_e.c index 7527780..7fb958a 100644 --- a/houdini_href_e.c +++ b/houdini_href_e.c @@ -49,10 +49,10 @@ static const char HREF_SAFE[] = { }; int -houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, size_t size) +houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { static const uint8_t hex_chars[] = "0123456789ABCDEF"; - size_t i = 0, org; + bufsize_t i = 0, org; uint8_t hex_str[3]; hex_str[0] = '%'; diff --git a/houdini_html_e.c b/houdini_html_e.c index 1a4c3e1..7f4b91f 100644 --- a/houdini_html_e.c +++ b/houdini_html_e.c @@ -45,9 +45,9 @@ static const char *HTML_ESCAPES[] = { }; int -houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, size_t size, int secure) +houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size, int secure) { - size_t i = 0, org, esc = 0; + bufsize_t i = 0, org, esc = 0; while (i < size) { org = i; @@ -75,7 +75,7 @@ houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, size_t size, int secu } int -houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, size_t size) +houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { return houdini_escape_html0(ob, src, size, 1); } diff --git a/houdini_html_u.c b/houdini_html_u.c index 2cb14b4..e57894d 100644 --- a/houdini_html_u.c +++ b/houdini_html_u.c @@ -7,37 +7,50 @@ #include "utf8.h" #include "html_unescape.h" -size_t -houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size) +bufsize_t +houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { - size_t i = 0; + bufsize_t i = 0; - if (size > 3 && src[0] == '#') { - int codepoint = 0; + if (size >= 3 && src[0] == '#') { + int codepoint = 0; + int num_digits = 0; if (_isdigit(src[1])) { for (i = 1; i < size && _isdigit(src[i]); ++i) { - int cp = (codepoint * 10) + (src[i] - '0'); + codepoint = (codepoint * 10) + (src[i] - '0'); - if (cp < codepoint) - return 0; - - codepoint = cp; + if (codepoint >= 0x110000) { + // Keep counting digits but + // avoid integer overflow. + codepoint = 0x110000; + } } + + num_digits = i - 1; } else if (src[1] == 'x' || src[1] == 'X') { for (i = 2; i < size && _isxdigit(src[i]); ++i) { - int cp = (codepoint * 16) + ((src[i] | 32) % 39 - 9); + codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9); - if (cp < codepoint) - return 0; - - codepoint = cp; + if (codepoint >= 0x110000) { + // Keep counting digits but + // avoid integer overflow. + codepoint = 0x110000; + } } + + num_digits = i - 2; } - if (i < size && src[i] == ';' && codepoint) { + if (num_digits >= 1 && num_digits <= 8 && + i < size && src[i] == ';') { + if (codepoint == 0 || + (codepoint >= 0xD800 && codepoint < 0xE000) || + codepoint >= 0x110000) { + codepoint = 0xFFFD; + } utf8proc_encode_char(codepoint, ob); return i + 1; } @@ -55,7 +68,7 @@ houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size) const struct html_ent *entity = find_entity((char *)src, i); if (entity != NULL) { - int len = 0; + bufsize_t len = 0; while (len < 4 && entity->utf8[len] != '\0') { ++len; } @@ -72,9 +85,9 @@ houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size) } int -houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, size_t size) +houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { - size_t i = 0, org, ent; + bufsize_t i = 0, org, ent; while (i < size) { org = i; @@ -109,7 +122,7 @@ houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, size_t size) return 1; } -void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, size_t size) +void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { if (!houdini_unescape_html(ob, src, size)) cmark_strbuf_put(ob, src, size); diff --git a/html.c b/html.c index f1b88fa..a30bbca 100644 --- a/html.c +++ b/html.c @@ -11,20 +11,9 @@ // Functions to convert cmark_nodes to HTML strings. -static void escape_html(cmark_strbuf *dest, const unsigned char *source, int length) +static void escape_html(cmark_strbuf *dest, const unsigned char *source, bufsize_t length) { - if (length < 0) - length = strlen((char *)source); - - houdini_escape_html0(dest, source, (size_t)length, 0); -} - -static void escape_href(cmark_strbuf *dest, const unsigned char *source, int length) -{ - if (length < 0) - length = strlen((char *)source); - - houdini_escape_href(dest, source, (size_t)length); + houdini_escape_html0(dest, source, length, 0); } static inline void cr(cmark_strbuf *html) @@ -165,7 +154,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, S_render_sourcepos(node, html, options); cmark_strbuf_puts(html, ">"); } else { - int first_tag = 0; + bufsize_t first_tag = 0; while (first_tag < node->as.code.info.len && node->as.code.info.data[first_tag] != ' ') { first_tag += 1; @@ -261,12 +250,13 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, case CMARK_NODE_LINK: if (entering) { cmark_strbuf_puts(html, "as.link.url) - escape_href(html, node->as.link.url, -1); + houdini_escape_href(html, node->as.link.url.data, + node->as.link.url.len); - if (node->as.link.title) { + if (node->as.link.title.len) { cmark_strbuf_puts(html, "\" title=\""); - escape_html(html, node->as.link.title, -1); + escape_html(html, node->as.link.title.data, + node->as.link.title.len); } cmark_strbuf_puts(html, "\">"); @@ -278,15 +268,16 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, case CMARK_NODE_IMAGE: if (entering) { cmark_strbuf_puts(html, "as.link.url) - escape_href(html, node->as.link.url, -1); + houdini_escape_href(html, node->as.link.url.data, + node->as.link.url.len); cmark_strbuf_puts(html, "\" alt=\""); state->plain = node; } else { - if (node->as.link.title) { + if (node->as.link.title.len) { cmark_strbuf_puts(html, "\" title=\""); - escape_html(html, node->as.link.title, -1); + escape_html(html, node->as.link.title.data, + node->as.link.title.len); } cmark_strbuf_puts(html, "\" />"); diff --git a/inlines.c b/inlines.c index afe564c..7e8f806 100644 --- a/inlines.c +++ b/inlines.c @@ -36,7 +36,7 @@ typedef struct delimiter { struct delimiter *previous; struct delimiter *next; cmark_node *inl_text; - int position; + bufsize_t position; unsigned char delim_char; bool can_open; bool can_close; @@ -45,7 +45,7 @@ typedef struct delimiter { typedef struct { cmark_chunk input; - int pos; + bufsize_t pos; cmark_reference_map *refmap; delimiter *last_delim; } subject; @@ -57,33 +57,35 @@ static int parse_inline(subject* subj, cmark_node * parent, int options); static void subject_from_buf(subject *e, cmark_strbuf *buffer, cmark_reference_map *refmap); -static int subject_find_special_char(subject *subj, int options); +static bufsize_t subject_find_special_char(subject *subj, int options); -static unsigned char *cmark_clean_autolink(cmark_chunk *url, int is_email) +static cmark_chunk cmark_clean_autolink(cmark_chunk *url, int is_email) { cmark_strbuf buf = GH_BUF_INIT; cmark_chunk_trim(url); - if (url->len == 0) - return NULL; + if (url->len == 0) { + cmark_chunk result = CMARK_CHUNK_EMPTY; + return result; + } if (is_email) cmark_strbuf_puts(&buf, "mailto:"); houdini_unescape_html_f(&buf, url->data, url->len); - return cmark_strbuf_detach(&buf); + return cmark_chunk_buf_detach(&buf); } -static inline cmark_node *make_link(cmark_node *label, unsigned char *url, unsigned char *title) +static inline cmark_node *make_link(cmark_node *label, cmark_chunk *url, cmark_chunk *title) { cmark_node* e = (cmark_node *)calloc(1, sizeof(*e)); if(e != NULL) { e->type = CMARK_NODE_LINK; e->first_child = label; e->last_child = label; - e->as.link.url = url; - e->as.link.title = title; + e->as.link.url = *url; + e->as.link.title = *title; e->next = NULL; label->parent = e; } @@ -92,7 +94,9 @@ static inline cmark_node *make_link(cmark_node *label, unsigned char *url, unsig static inline cmark_node* make_autolink(cmark_node* label, cmark_chunk url, int is_email) { - return make_link(label, cmark_clean_autolink(&url, is_email), NULL); + cmark_chunk clean_url = cmark_clean_autolink(&url, is_email); + cmark_chunk title = CMARK_CHUNK_EMPTY; + return make_link(label, &clean_url, &title); } // Create an inline with a literal string value. @@ -134,19 +138,20 @@ static inline cmark_node* make_simple(cmark_node_type t) return e; } -static unsigned char *bufdup(const unsigned char *buf) +// Duplicate a chunk by creating a copy of the buffer not by reusing the +// buffer like cmark_chunk_dup does. +static cmark_chunk chunk_clone(cmark_chunk *src) { - unsigned char *new_buf = NULL; + cmark_chunk c; + bufsize_t len = src->len; - if (buf) { - int len = strlen((char *)buf); - new_buf = (unsigned char *)calloc(len + 1, sizeof(*new_buf)); - if(new_buf != NULL) { - memcpy(new_buf, buf, len + 1); - } - } + c.len = len; + c.data = (unsigned char *)malloc(len + 1); + c.alloc = 1; + memcpy(c.data, src->data, len); + c.data[len] = '\0'; - return new_buf; + return c; } static void subject_from_buf(subject *e, cmark_strbuf *buffer, @@ -172,7 +177,7 @@ static inline unsigned char peek_char(subject *subj) return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0; } -static inline unsigned char peek_at(subject *subj, int pos) +static inline unsigned char peek_at(subject *subj, bufsize_t pos) { return subj->input.data[pos]; } @@ -190,8 +195,8 @@ static inline int is_eof(subject* subj) static inline cmark_chunk take_while(subject* subj, int (*f)(int)) { unsigned char c; - int startpos = subj->pos; - int len = 0; + bufsize_t startpos = subj->pos; + bufsize_t len = 0; while ((c = peek_char(subj)) && (*f)(c)) { advance(subj); @@ -206,7 +211,7 @@ static inline cmark_chunk take_while(subject* subj, int (*f)(int)) // parsed). Return 0 if you don't find matching closing // backticks, otherwise return the position in the subject // after the closing backticks. -static int scan_to_closing_backticks(subject* subj, int openticklength) +static bufsize_t scan_to_closing_backticks(subject* subj, bufsize_t openticklength) { // read non backticks unsigned char c; @@ -216,7 +221,7 @@ static int scan_to_closing_backticks(subject* subj, int openticklength) if (is_eof(subj)) { return 0; // did not find closing ticks, return 0 } - int numticks = 0; + bufsize_t numticks = 0; while (peek_char(subj) == '`') { advance(subj); numticks++; @@ -232,8 +237,8 @@ static int scan_to_closing_backticks(subject* subj, int openticklength) static cmark_node* handle_backticks(subject *subj) { cmark_chunk openticks = take_while(subj, isbacktick); - int startpos = subj->pos; - int endpos = scan_to_closing_backticks(subj, openticks.len); + bufsize_t startpos = subj->pos; + bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len); if (endpos == 0) { // not found subj->pos = startpos; // rewind @@ -255,7 +260,7 @@ static int scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close) { int numdelims = 0; - int before_char_pos; + bufsize_t before_char_pos; int32_t after_char = 0; int32_t before_char = 0; int len; @@ -302,9 +307,9 @@ scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close) !utf8proc_is_punctuation(after_char)); if (c == '_') { *can_open = left_flanking && - (!right_flanking || utf8proc_is_punctuation(before_char)); + (!right_flanking || utf8proc_is_punctuation(before_char)); *can_close = right_flanking && - (!left_flanking || utf8proc_is_punctuation(after_char)); + (!left_flanking || utf8proc_is_punctuation(after_char)); } else if (c == '\'' || c == '"') { *can_open = left_flanking && !right_flanking; *can_close = right_flanking; @@ -371,7 +376,7 @@ static void push_delimiter(subject *subj, unsigned char c, bool can_open, // Assumes the subject has a c at the current position. static cmark_node* handle_delim(subject* subj, unsigned char c, bool smart) { - int numdelims; + bufsize_t numdelims; cmark_node * inl_text; bool can_open, can_close; cmark_chunk contents; @@ -495,11 +500,11 @@ static delimiter* S_insert_emph(subject *subj, delimiter *opener, delimiter *closer) { delimiter *delim, *tmp_delim; - int use_delims; + bufsize_t use_delims; cmark_node *opener_inl = opener->inl_text; cmark_node *closer_inl = closer->inl_text; - int opener_num_chars = opener_inl->as.literal.len; - int closer_num_chars = closer_inl->as.literal.len; + bufsize_t opener_num_chars = opener_inl->as.literal.len; + bufsize_t closer_num_chars = closer_inl->as.literal.len; cmark_node *tmp, *emph, *first_child, *last_child; // calculate the actual number of characters used from this closer @@ -578,7 +583,7 @@ static cmark_node* handle_backslash(subject *subj) if (cmark_ispunct(nextchar)) { // only ascii symbols and newline can be escaped advance(subj); return make_str(cmark_chunk_dup(&subj->input, subj->pos - 1, 1)); - } else if (nextchar == '\n') { + } else if (nextchar == '\r' || nextchar == '\n') { advance(subj); return make_linebreak(); } else { @@ -591,7 +596,7 @@ static cmark_node* handle_backslash(subject *subj) static cmark_node* handle_entity(subject* subj) { cmark_strbuf ent = GH_BUF_INIT; - size_t len; + bufsize_t len; advance(subj); @@ -613,7 +618,7 @@ static cmark_node *make_str_with_entities(cmark_chunk *content) { cmark_strbuf unescaped = GH_BUF_INIT; - if (houdini_unescape_html(&unescaped, content->data, (size_t)content->len)) { + if (houdini_unescape_html(&unescaped, content->data, content->len)) { return make_str(cmark_chunk_buf_detach(&unescaped)); } else { return make_str(*content); @@ -622,14 +627,16 @@ static cmark_node *make_str_with_entities(cmark_chunk *content) // Clean a URL: remove surrounding whitespace and surrounding <>, // and remove \ that escape punctuation. -unsigned char *cmark_clean_url(cmark_chunk *url) +cmark_chunk cmark_clean_url(cmark_chunk *url) { cmark_strbuf buf = GH_BUF_INIT; cmark_chunk_trim(url); - if (url->len == 0) - return NULL; + if (url->len == 0) { + cmark_chunk result = CMARK_CHUNK_EMPTY; + return result; + } if (url->data[0] == '<' && url->data[url->len - 1] == '>') { houdini_unescape_html_f(&buf, url->data + 1, url->len - 2); @@ -638,16 +645,18 @@ unsigned char *cmark_clean_url(cmark_chunk *url) } cmark_strbuf_unescape(&buf); - return buf.size == 0 ? NULL : cmark_strbuf_detach(&buf); + return cmark_chunk_buf_detach(&buf); } -unsigned char *cmark_clean_title(cmark_chunk *title) +cmark_chunk cmark_clean_title(cmark_chunk *title) { cmark_strbuf buf = GH_BUF_INIT; unsigned char first, last; - if (title->len == 0) - return NULL; + if (title->len == 0) { + cmark_chunk result = CMARK_CHUNK_EMPTY; + return result; + } first = title->data[0]; last = title->data[title->len - 1]; @@ -662,14 +671,14 @@ unsigned char *cmark_clean_title(cmark_chunk *title) } cmark_strbuf_unescape(&buf); - return buf.size == 0 ? NULL : cmark_strbuf_detach(&buf); + return cmark_chunk_buf_detach(&buf); } // Parse an autolink or HTML tag. // Assumes the subject has a '<' character at the current position. static cmark_node* handle_pointy_brace(subject* subj) { - int matchlen = 0; + bufsize_t matchlen = 0; cmark_chunk contents; advance(subj); // advance past first < @@ -716,7 +725,7 @@ static cmark_node* handle_pointy_brace(subject* subj) // encountered. Backticks in labels do not start code spans. static int link_label(subject* subj, cmark_chunk *raw_label) { - int startpos = subj->pos; + bufsize_t startpos = subj->pos; int length = 0; unsigned char c; @@ -746,6 +755,7 @@ static int link_label(subject* subj, cmark_chunk *raw_label) if (c == ']') { // match found *raw_label = cmark_chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1)); + cmark_chunk_trim(raw_label); advance(subj); // advance past ] return 1; } @@ -759,14 +769,14 @@ static int link_label(subject* subj, cmark_chunk *raw_label) // Return a link, an image, or a literal close bracket. static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent) { - int initial_pos; - int starturl, endurl, starttitle, endtitle, endall; - int n; - int sps; + bufsize_t initial_pos; + bufsize_t starturl, endurl, starttitle, endtitle, endall; + bufsize_t n; + bufsize_t sps; cmark_reference *ref; bool is_image = false; cmark_chunk url_chunk, title_chunk; - unsigned char *url, *title; + cmark_chunk url, title; delimiter *opener; cmark_node *link_text; cmark_node *inl; @@ -854,8 +864,8 @@ static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent) cmark_chunk_free(&raw_label); if (ref != NULL) { // found - url = bufdup(ref->url); - title = bufdup(ref->title); + url = chunk_clone(&ref->url); + title = chunk_clone(&ref->title); goto match; } else { goto noMatch; @@ -912,7 +922,7 @@ static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent) // Assumes the subject has a newline at the current position. static cmark_node* handle_newline(subject *subj) { - int nlpos = subj->pos; + bufsize_t nlpos = subj->pos; // skip over newline advance(subj); // skip spaces at beginning of line @@ -928,11 +938,11 @@ static cmark_node* handle_newline(subject *subj) } } -static int subject_find_special_char(subject *subj, int options) +static bufsize_t subject_find_special_char(subject *subj, int options) { - // "\n\\`&_*[]pos + 1; + bufsize_t n = subj->pos + 1; while (n < subj->input.len) { if (SPECIAL_CHARS[subj->input.data[n]]) @@ -991,12 +1001,13 @@ static int parse_inline(subject* subj, cmark_node * parent, int options) cmark_node* new_inl = NULL; cmark_chunk contents; unsigned char c; - int endpos; + bufsize_t endpos; c = peek_char(subj); if (c == 0) { return 0; } switch(c) { + case '\r': case '\n': new_inl = handle_newline(subj); break; @@ -1048,7 +1059,7 @@ static int parse_inline(subject* subj, cmark_node * parent, int options) subj->pos = endpos; // if we're at a newline, strip trailing spaces. - if (peek_char(subj) == '\n') { + if (peek_char(subj) == '\r' || peek_char(subj) == '\n') { cmark_chunk_rtrim(&contents); } @@ -1078,7 +1089,7 @@ static void spnl(subject* subj) bool seen_newline = false; while (peek_char(subj) == ' ' || (!seen_newline && - (seen_newline = peek_char(subj) == '\n'))) { + (seen_newline = peek_char(subj) == '\r' || peek_char(subj) == '\n'))) { advance(subj); } } @@ -1087,7 +1098,7 @@ static void spnl(subject* subj) // Modify refmap if a reference is encountered. // Return 0 if no reference found, otherwise position of subject // after reference is parsed. -int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap) +bufsize_t cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap) { subject subj; @@ -1095,13 +1106,13 @@ int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refma cmark_chunk url; cmark_chunk title; - int matchlen = 0; - int beforetitle; + bufsize_t matchlen = 0; + bufsize_t beforetitle; subject_from_buf(&subj, input, NULL); // parse label: - if (!link_label(&subj, &lab)) + if (!link_label(&subj, &lab) || lab.len == 0) return 0; // colon: @@ -1136,7 +1147,7 @@ int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refma while (peek_char(&subj) == ' ') { advance(&subj); } - if (peek_char(&subj) == '\n') { + if (peek_char(&subj) == '\r' || peek_char(&subj) == '\n') { advance(&subj); } else if (peek_char(&subj) != 0) { return 0; diff --git a/inlines.h b/inlines.h index 9e56790..f8847fc 100644 --- a/inlines.h +++ b/inlines.h @@ -5,12 +5,12 @@ extern "C" { #endif -unsigned char *cmark_clean_url(cmark_chunk *url); -unsigned char *cmark_clean_title(cmark_chunk *title); +cmark_chunk cmark_clean_url(cmark_chunk *url); +cmark_chunk cmark_clean_title(cmark_chunk *title); void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap, int options); -int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap); +bufsize_t cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap); #ifdef __cplusplus } diff --git a/iterator.c b/iterator.c index c6faf99..f18e3bf 100644 --- a/iterator.c +++ b/iterator.c @@ -129,18 +129,20 @@ void cmark_consolidate_text_nodes(cmark_node *root) cur->next && cur->next->type == CMARK_NODE_TEXT) { cmark_strbuf_clear(&buf); - cmark_strbuf_puts(&buf, cmark_node_get_literal(cur)); + cmark_strbuf_put(&buf, cur->as.literal.data, cur->as.literal.len); tmp = cur->next; while (tmp && tmp->type == CMARK_NODE_TEXT) { cmark_iter_next(iter); // advance pointer - cmark_strbuf_puts(&buf, cmark_node_get_literal(tmp)); + cmark_strbuf_put(&buf, tmp->as.literal.data, tmp->as.literal.len); next = tmp->next; cmark_node_free(tmp); tmp = next; } - cmark_node_set_literal(cur, (char *)cmark_strbuf_detach(&buf)); + cmark_chunk_free(&cur->as.literal); + cur->as.literal = cmark_chunk_buf_detach(&buf); } } + cmark_strbuf_free(&buf); cmark_iter_free(iter); } diff --git a/node.c b/node.c index 466b0a1..7b1bb10 100644 --- a/node.c +++ b/node.c @@ -122,12 +122,8 @@ void S_free_nodes(cmark_node *e) break; case NODE_LINK: case NODE_IMAGE: - if (e->as.link.url) { - free(e->as.link.url); - } - if (e->as.link.title) { - free(e->as.link.title); - } + cmark_chunk_free(&e->as.link.url); + cmark_chunk_free(&e->as.link.title); break; default: break; @@ -282,15 +278,6 @@ cmark_node_set_user_data(cmark_node *node, void *user_data) return 1; } -static char* -S_strdup(const char *str) -{ - size_t size = strlen(str) + 1; - char *dup = (char *)malloc(size); - memcpy(dup, str, size); - return dup; -} - const char* cmark_node_get_literal(cmark_node *node) { @@ -541,7 +528,7 @@ cmark_node_get_url(cmark_node *node) switch (node->type) { case NODE_LINK: case NODE_IMAGE: - return (char *)node->as.link.url; + return cmark_chunk_to_cstr(&node->as.link.url); default: break; } @@ -559,8 +546,7 @@ cmark_node_set_url(cmark_node *node, const char *url) switch (node->type) { case NODE_LINK: case NODE_IMAGE: - free(node->as.link.url); - node->as.link.url = (unsigned char *)S_strdup(url); + cmark_chunk_set_cstr(&node->as.link.url, url); return 1; default: break; @@ -579,7 +565,7 @@ cmark_node_get_title(cmark_node *node) switch (node->type) { case NODE_LINK: case NODE_IMAGE: - return (char *)node->as.link.title; + return cmark_chunk_to_cstr(&node->as.link.title); default: break; } @@ -597,8 +583,7 @@ cmark_node_set_title(cmark_node *node, const char *title) switch (node->type) { case NODE_LINK: case NODE_IMAGE: - free(node->as.link.title); - node->as.link.title = (unsigned char *)S_strdup(title); + cmark_chunk_set_cstr(&node->as.link.title, title); return 1; default: break; diff --git a/node.h b/node.h index 7a45d42..911a18f 100644 --- a/node.h +++ b/node.h @@ -38,8 +38,8 @@ typedef struct { } cmark_header; typedef struct { - unsigned char *url; - unsigned char *title; + cmark_chunk url; + cmark_chunk title; } cmark_link; struct cmark_node { diff --git a/parser.h b/parser.h index cbccae3..6e18c67 100644 --- a/parser.h +++ b/parser.h @@ -16,8 +16,12 @@ struct cmark_parser { struct cmark_node* root; struct cmark_node* current; int line_number; + bufsize_t offset; + bufsize_t first_nonspace; + int indent; + bool blank; cmark_strbuf *curline; - int last_line_length; + bufsize_t last_line_length; cmark_strbuf *linebuf; int options; }; diff --git a/references.c b/references.c index 37bf4cb..1d3d56d 100644 --- a/references.c +++ b/references.c @@ -20,8 +20,8 @@ static void reference_free(cmark_reference *ref) { if(ref != NULL) { free(ref->label); - free(ref->url); - free(ref->title); + cmark_chunk_free(&ref->url); + cmark_chunk_free(&ref->title); free(ref); } } diff --git a/references.h b/references.h index 69325bb..a360cd5 100644 --- a/references.h +++ b/references.h @@ -12,8 +12,8 @@ extern "C" { struct cmark_reference { struct cmark_reference *next; unsigned char *label; - unsigned char *url; - unsigned char *title; + cmark_chunk url; + cmark_chunk title; unsigned int hash; }; diff --git a/scanners.c b/scanners.c index 42b9275..3f4ddac 100644 --- a/scanners.c +++ b/scanners.c @@ -1,11 +1,11 @@ -/* Generated by re2c 0.13.6 */ +/* Generated by re2c 0.13.5 */ #include #include "chunk.h" #include "scanners.h" -int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset) +bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset) { - int res; + bufsize_t res; unsigned char *ptr = (unsigned char *)c->data; unsigned char lim = ptr[c->len]; @@ -19,7 +19,7 @@ int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset) // Try to match a scheme including colon. -int _scan_scheme(const unsigned char *p) +bufsize_t _scan_scheme(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -578,7 +578,7 @@ int _scan_scheme(const unsigned char *p) if (yych != ':') goto yy31; yy35: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy37: yych = *++p; if (yych == 'E') goto yy38; @@ -2919,7 +2919,7 @@ int _scan_scheme(const unsigned char *p) } // Try to match URI autolink after first <, returning number of chars matched. -int _scan_autolink_uri(const unsigned char *p) +bufsize_t _scan_autolink_uri(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -3517,7 +3517,7 @@ int _scan_autolink_uri(const unsigned char *p) } if (yych <= '=') goto yy516; ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy524: yych = *++p; if (yych == 'E') goto yy525; @@ -5858,7 +5858,7 @@ int _scan_autolink_uri(const unsigned char *p) } // Try to match email autolink after first <, returning num of chars matched. -int _scan_autolink_email(const unsigned char *p) +bufsize_t _scan_autolink_email(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -6060,7 +6060,7 @@ int _scan_autolink_email(const unsigned char *p) } yy985: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy987: ++p; yych = *p; @@ -10803,7 +10803,7 @@ int _scan_autolink_email(const unsigned char *p) } // Try to match an HTML tag after first <, returning num of chars matched. -int _scan_html_tag(const unsigned char *p) +bufsize_t _scan_html_tag(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -10964,7 +10964,7 @@ int _scan_html_tag(const unsigned char *p) if (yych != '>') goto yy1239; yy1243: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1245: yych = *++p; if (yych == 'C') goto yy1260; @@ -11455,7 +11455,7 @@ int _scan_html_tag(const unsigned char *p) // Try to match an HTML block tag including first <, // returning num of chars matched. -int _scan_html_block_tag(const unsigned char *p) +bufsize_t _scan_html_block_tag(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -11513,7 +11513,7 @@ int _scan_html_block_tag(const unsigned char *p) goto yy1301; yy1304: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1306: yych = *++p; if (yych <= '/') { @@ -12022,7 +12022,7 @@ int _scan_html_block_tag(const unsigned char *p) } yy1344: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1346: yych = *++p; if (yych <= 'R') { @@ -12639,7 +12639,7 @@ int _scan_html_block_tag(const unsigned char *p) } yy1467: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1469: yych = *++p; if (yych <= 'R') { @@ -13243,7 +13243,7 @@ int _scan_html_block_tag(const unsigned char *p) // This may optionally be contained in <..>; otherwise // whitespace and unbalanced right parentheses aren't allowed. // Newlines aren't ever allowed. -int _scan_link_url(const unsigned char *p) +bufsize_t _scan_link_url(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -13253,7 +13253,7 @@ int _scan_link_url(const unsigned char *p) unsigned int yyaccept = 0; static const unsigned char yybm[] = { 0, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 8, 64, 64, 64, 64, 64, + 64, 64, 8, 64, 64, 8, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 72, 112, 112, 112, 112, 112, 112, 112, @@ -13286,27 +13286,29 @@ int _scan_link_url(const unsigned char *p) 112, 112, 112, 112, 112, 112, 112, 112, }; yych = *p; - if (yych <= '(') { - if (yych <= 0x1F) { + if (yych <= '\'') { + if (yych <= '\f') { if (yych == '\n') goto yy1589; goto yy1597; } else { + if (yych <= '\r') goto yy1591; + if (yych <= 0x1F) goto yy1597; if (yych <= ' ') goto yy1591; - if (yych <= '\'') goto yy1593; - goto yy1596; + goto yy1593; } } else { - if (yych <= '<') { + if (yych <= ';') { + if (yych <= '(') goto yy1596; if (yych <= ')') goto yy1597; - if (yych <= ';') goto yy1593; - goto yy1592; + goto yy1593; } else { + if (yych <= '<') goto yy1592; if (yych == '\\') goto yy1594; goto yy1593; } } yy1588: - { return (p - start); } + { return (bufsize_t)(p - start); } yy1589: yyaccept = 0; marker = ++p; @@ -13339,13 +13341,18 @@ int _scan_link_url(const unsigned char *p) if (yybm[0+yych] & 32) { goto yy1605; } - if (yych <= '\'') { - if (yych <= 0x00) goto yy1588; - if (yych == '\n') goto yy1588; - goto yy1612; + if (yych <= '\r') { + if (yych <= '\t') { + if (yych <= 0x00) goto yy1588; + goto yy1612; + } else { + if (yych <= '\n') goto yy1588; + if (yych <= '\f') goto yy1612; + goto yy1588; + } } else { if (yych <= ')') { - if (yych <= '(') goto yy1610; + if (yych == '(') goto yy1610; goto yy1612; } else { if (yych <= '=') goto yy1602; @@ -13395,7 +13402,7 @@ int _scan_link_url(const unsigned char *p) yy1600: p = marker; if (yyaccept <= 1) { - if (yyaccept == 0) { + if (yyaccept <= 0) { goto yy1588; } else { goto yy1595; @@ -13459,13 +13466,18 @@ int _scan_link_url(const unsigned char *p) if (yybm[0+yych] & 32) { goto yy1605; } - if (yych <= '\'') { - if (yych <= 0x00) goto yy1588; - if (yych == '\n') goto yy1588; - goto yy1612; + if (yych <= '\r') { + if (yych <= '\t') { + if (yych <= 0x00) goto yy1588; + goto yy1612; + } else { + if (yych <= '\n') goto yy1588; + if (yych <= '\f') goto yy1612; + goto yy1588; + } } else { if (yych <= ')') { - if (yych <= '(') goto yy1610; + if (yych == '(') goto yy1610; goto yy1612; } else { if (yych <= '=') goto yy1602; @@ -13478,48 +13490,57 @@ int _scan_link_url(const unsigned char *p) if (yych <= ' ') goto yy1608; if (yych != ')') goto yy1603; yy1608: - { return (p - start); } + { return (bufsize_t)(p - start); } yy1609: ++p; yych = *p; - if (yych <= '>') { - if (yych <= ' ') { + if (yych <= '=') { + if (yych <= '\f') { if (yych <= 0x00) goto yy1600; if (yych == '\n') goto yy1600; goto yy1612; } else { - if (yych <= '/') goto yy1605; - if (yych <= '9') goto yy1612; - if (yych <= '=') goto yy1605; - goto yy1622; + if (yych <= ' ') { + if (yych <= '\r') goto yy1600; + goto yy1612; + } else { + if (yych <= '/') goto yy1605; + if (yych <= '9') goto yy1612; + goto yy1605; + } } } else { - if (yych <= '\\') { + if (yych <= '[') { + if (yych <= '>') goto yy1622; if (yych <= '@') goto yy1605; if (yych <= 'Z') goto yy1612; - if (yych <= '[') goto yy1605; - goto yy1623; + goto yy1605; } else { - if (yych <= '`') goto yy1605; - if (yych <= 'z') goto yy1612; - if (yych <= '~') goto yy1605; - goto yy1612; + if (yych <= '`') { + if (yych <= '\\') goto yy1623; + goto yy1605; + } else { + if (yych <= 'z') goto yy1612; + if (yych <= '~') goto yy1605; + goto yy1612; + } } } yy1610: ++p; yych = *p; - if (yych <= ')') { - if (yych <= '\n') { + if (yych <= '(') { + if (yych <= '\f') { if (yych <= 0x00) goto yy1600; - if (yych >= '\n') goto yy1600; + if (yych == '\n') goto yy1600; } else { + if (yych <= '\r') goto yy1600; if (yych <= ' ') goto yy1612; if (yych <= '\'') goto yy1610; - if (yych >= ')') goto yy1605; } } else { if (yych <= '=') { + if (yych <= ')') goto yy1605; if (yych == '<') goto yy1598; goto yy1610; } else { @@ -13545,11 +13566,12 @@ int _scan_link_url(const unsigned char *p) if (yybm[0+yych] & 128) { goto yy1615; } - if (yych <= '\n') { + if (yych <= '\f') { if (yych <= 0x00) goto yy1600; - if (yych <= '\t') goto yy1612; - goto yy1600; + if (yych == '\n') goto yy1600; + goto yy1612; } else { + if (yych <= '\r') goto yy1600; if (yych != '>') goto yy1612; } yyaccept = 2; @@ -13570,46 +13592,56 @@ int _scan_link_url(const unsigned char *p) yy1619: ++p; yych = *p; - if (yych <= '>') { - if (yych <= ' ') { + if (yych <= '=') { + if (yych <= '\f') { if (yych <= 0x00) goto yy1600; if (yych == '\n') goto yy1600; goto yy1612; } else { - if (yych <= '/') goto yy1610; - if (yych <= '9') goto yy1612; - if (yych <= '=') goto yy1610; + if (yych <= ' ') { + if (yych <= '\r') goto yy1600; + goto yy1612; + } else { + if (yych <= '/') goto yy1610; + if (yych <= '9') goto yy1612; + goto yy1610; + } } } else { - if (yych <= '\\') { + if (yych <= '[') { + if (yych <= '>') goto yy1620; if (yych <= '@') goto yy1610; if (yych <= 'Z') goto yy1612; - if (yych <= '[') goto yy1610; - goto yy1621; + goto yy1610; } else { - if (yych <= '`') goto yy1610; - if (yych <= 'z') goto yy1612; - if (yych <= '~') goto yy1610; - goto yy1612; + if (yych <= '`') { + if (yych <= '\\') goto yy1621; + goto yy1610; + } else { + if (yych <= 'z') goto yy1612; + if (yych <= '~') goto yy1610; + goto yy1612; + } } } yy1620: yyaccept = 2; marker = ++p; yych = *p; - if (yych <= ')') { - if (yych <= '\n') { + if (yych <= '(') { + if (yych <= '\f') { if (yych <= 0x00) goto yy1608; - if (yych <= '\t') goto yy1612; - goto yy1608; + if (yych == '\n') goto yy1608; + goto yy1612; } else { + if (yych <= '\r') goto yy1608; if (yych <= ' ') goto yy1612; if (yych <= '\'') goto yy1610; - if (yych <= '(') goto yy1612; - goto yy1605; + goto yy1612; } } else { if (yych <= '=') { + if (yych <= ')') goto yy1605; if (yych == '<') goto yy1598; goto yy1610; } else { @@ -13621,22 +13653,23 @@ int _scan_link_url(const unsigned char *p) yy1621: ++p; yych = *p; - if (yych <= '(') { + if (yych <= '\'') { if (yych <= '\n') { if (yych <= 0x00) goto yy1600; if (yych <= '\t') goto yy1612; goto yy1600; } else { + if (yych == '\r') goto yy1600; if (yych <= ' ') goto yy1612; - if (yych <= '\'') goto yy1610; - goto yy1612; + goto yy1610; } } else { - if (yych <= '>') { + if (yych <= '=') { + if (yych <= '(') goto yy1612; if (yych <= ')') goto yy1605; - if (yych <= '=') goto yy1610; - goto yy1620; + goto yy1610; } else { + if (yych <= '>') goto yy1620; if (yych == '\\') goto yy1619; goto yy1610; } @@ -13648,13 +13681,18 @@ int _scan_link_url(const unsigned char *p) if (yybm[0+yych] & 32) { goto yy1605; } - if (yych <= '\'') { - if (yych <= 0x00) goto yy1608; - if (yych == '\n') goto yy1608; - goto yy1612; + if (yych <= '\r') { + if (yych <= '\t') { + if (yych <= 0x00) goto yy1608; + goto yy1612; + } else { + if (yych <= '\n') goto yy1608; + if (yych <= '\f') goto yy1612; + goto yy1608; + } } else { if (yych <= ')') { - if (yych <= '(') goto yy1610; + if (yych == '(') goto yy1610; goto yy1612; } else { if (yych <= '=') goto yy1602; @@ -13666,22 +13704,23 @@ int _scan_link_url(const unsigned char *p) yyaccept = 0; marker = ++p; yych = *p; - if (yych <= '(') { + if (yych <= '\'') { if (yych <= '\n') { if (yych <= 0x00) goto yy1588; if (yych <= '\t') goto yy1612; goto yy1588; } else { + if (yych == '\r') goto yy1588; if (yych <= ' ') goto yy1612; - if (yych <= '\'') goto yy1605; - goto yy1610; + goto yy1605; } } else { - if (yych <= '>') { + if (yych <= '=') { + if (yych <= '(') goto yy1610; if (yych <= ')') goto yy1612; - if (yych <= '=') goto yy1605; - goto yy1622; + goto yy1605; } else { + if (yych <= '>') goto yy1622; if (yych == '\\') goto yy1609; goto yy1605; } @@ -13693,7 +13732,7 @@ int _scan_link_url(const unsigned char *p) // Try to match a link title (in single quotes, in double quotes, or // in parentheses), returning number of chars matched. Allow one // level of internal nesting (quotes within quotes). -int _scan_link_title(const unsigned char *p) +bufsize_t _scan_link_title(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -13779,13 +13818,13 @@ int _scan_link_title(const unsigned char *p) yy1633: p = marker; if (yyaccept <= 1) { - if (yyaccept == 0) { + if (yyaccept <= 0) { goto yy1626; } else { goto yy1637; } } else { - if (yyaccept == 2) { + if (yyaccept <= 2) { goto yy1644; } else { goto yy1651; @@ -13803,7 +13842,7 @@ int _scan_link_title(const unsigned char *p) yy1636: ++p; yy1637: - { return (p - start); } + { return (bufsize_t)(p - start); } yy1638: yyaccept = 1; marker = ++p; @@ -13835,7 +13874,7 @@ int _scan_link_title(const unsigned char *p) yy1643: ++p; yy1644: - { return (p - start); } + { return (bufsize_t)(p - start); } yy1645: yyaccept = 2; marker = ++p; @@ -13867,7 +13906,7 @@ int _scan_link_title(const unsigned char *p) yy1650: ++p; yy1651: - { return (p - start); } + { return (bufsize_t)(p - start); } yy1652: yyaccept = 3; marker = ++p; @@ -13883,7 +13922,7 @@ int _scan_link_title(const unsigned char *p) } // Match space characters, including newlines. -int _scan_spacechars(const unsigned char *p) +bufsize_t _scan_spacechars(const unsigned char *p) { const unsigned char *start = p; \ @@ -13934,7 +13973,7 @@ int _scan_spacechars(const unsigned char *p) goto yy1659; } yy1655: - { return (p - start); } + { return (bufsize_t)(p - start); } yy1656: yych = *++p; goto yy1658; @@ -13954,7 +13993,7 @@ int _scan_spacechars(const unsigned char *p) } // Match ATX header start. -int _scan_atx_header_start(const unsigned char *p) +bufsize_t _scan_atx_header_start(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -14006,16 +14045,21 @@ int _scan_atx_header_start(const unsigned char *p) if (yybm[0+yych] & 128) { goto yy1668; } - if (yych == '\n') goto yy1666; - if (yych == '#') goto yy1670; - goto yy1663; + if (yych <= '\f') { + if (yych == '\n') goto yy1666; + goto yy1663; + } else { + if (yych <= '\r') goto yy1666; + if (yych == '#') goto yy1670; + goto yy1663; + } yy1665: yych = *++p; goto yy1663; yy1666: ++p; yy1667: - { return (p - start); } + { return (bufsize_t)(p - start); } yy1668: ++p; yych = *p; @@ -14028,8 +14072,12 @@ int _scan_atx_header_start(const unsigned char *p) if (yybm[0+yych] & 128) { goto yy1668; } - if (yych == '\n') goto yy1666; - if (yych == '#') goto yy1672; + if (yych <= '\f') { + if (yych == '\n') goto yy1666; + } else { + if (yych <= '\r') goto yy1666; + if (yych == '#') goto yy1672; + } yy1671: p = marker; goto yy1663; @@ -14038,33 +14086,49 @@ int _scan_atx_header_start(const unsigned char *p) if (yybm[0+yych] & 128) { goto yy1668; } - if (yych == '\n') goto yy1666; - if (yych != '#') goto yy1671; + if (yych <= '\f') { + if (yych == '\n') goto yy1666; + goto yy1671; + } else { + if (yych <= '\r') goto yy1666; + if (yych != '#') goto yy1671; + } yych = *++p; if (yybm[0+yych] & 128) { goto yy1668; } - if (yych == '\n') goto yy1666; - if (yych != '#') goto yy1671; + if (yych <= '\f') { + if (yych == '\n') goto yy1666; + goto yy1671; + } else { + if (yych <= '\r') goto yy1666; + if (yych != '#') goto yy1671; + } yych = *++p; if (yybm[0+yych] & 128) { goto yy1668; } - if (yych == '\n') goto yy1666; - if (yych != '#') goto yy1671; + if (yych <= '\f') { + if (yych == '\n') goto yy1666; + goto yy1671; + } else { + if (yych <= '\r') goto yy1666; + if (yych != '#') goto yy1671; + } ++p; if (yybm[0+(yych = *p)] & 128) { goto yy1668; } if (yych == '\n') goto yy1666; + if (yych == '\r') goto yy1666; goto yy1671; } } -// Match sexext header line. Return 1 for level-1 header, +// Match setext header line. Return 1 for level-1 header, // 2 for level-2, 0 for no match. -int _scan_setext_header_line(const unsigned char *p) +bufsize_t _scan_setext_header_line(const unsigned char *p) { const unsigned char *marker = NULL; @@ -14119,17 +14183,27 @@ int _scan_setext_header_line(const unsigned char *p) if (yybm[0+yych] & 128) { goto yy1693; } - if (yych == '\n') goto yy1691; - if (yych == ' ') goto yy1689; - goto yy1678; + if (yych <= '\f') { + if (yych == '\n') goto yy1691; + goto yy1678; + } else { + if (yych <= '\r') goto yy1691; + if (yych == ' ') goto yy1689; + goto yy1678; + } yy1680: yych = *(marker = ++p); if (yybm[0+yych] & 32) { goto yy1682; } - if (yych == '\n') goto yy1685; - if (yych == '-') goto yy1687; - goto yy1678; + if (yych <= '\f') { + if (yych == '\n') goto yy1685; + goto yy1678; + } else { + if (yych <= '\r') goto yy1685; + if (yych == '-') goto yy1687; + goto yy1678; + } yy1681: yych = *++p; goto yy1678; @@ -14140,6 +14214,7 @@ int _scan_setext_header_line(const unsigned char *p) goto yy1682; } if (yych == '\n') goto yy1685; + if (yych == '\r') goto yy1685; yy1684: p = marker; goto yy1678; @@ -14152,15 +14227,24 @@ int _scan_setext_header_line(const unsigned char *p) if (yybm[0+yych] & 32) { goto yy1682; } - if (yych == '\n') goto yy1685; - if (yych == '-') goto yy1687; - goto yy1684; + if (yych <= '\f') { + if (yych == '\n') goto yy1685; + goto yy1684; + } else { + if (yych <= '\r') goto yy1685; + if (yych == '-') goto yy1687; + goto yy1684; + } yy1689: ++p; yych = *p; - if (yych == '\n') goto yy1691; - if (yych == ' ') goto yy1689; - goto yy1684; + if (yych <= '\f') { + if (yych != '\n') goto yy1684; + } else { + if (yych <= '\r') goto yy1691; + if (yych == ' ') goto yy1689; + goto yy1684; + } yy1691: ++p; { return 1; } @@ -14170,9 +14254,14 @@ int _scan_setext_header_line(const unsigned char *p) if (yybm[0+yych] & 128) { goto yy1693; } - if (yych == '\n') goto yy1691; - if (yych == ' ') goto yy1689; - goto yy1684; + if (yych <= '\f') { + if (yych == '\n') goto yy1691; + goto yy1684; + } else { + if (yych <= '\r') goto yy1691; + if (yych == ' ') goto yy1689; + goto yy1684; + } } } @@ -14180,7 +14269,7 @@ int _scan_setext_header_line(const unsigned char *p) // Scan a horizontal rule line: "...three or more hyphens, asterisks, // or underscores on a line by themselves. If you wish, you may use // spaces between the hyphens or asterisks." -int _scan_hrule(const unsigned char *p) +bufsize_t _scan_hrule(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -14278,20 +14367,24 @@ int _scan_hrule(const unsigned char *p) if (yybm[0+yych] & 16) { goto yy1707; } - if (yych <= 0x08) goto yy1704; - if (yych <= '\t') goto yy1709; - if (yych <= '\n') goto yy1711; - goto yy1704; + if (yych <= '\n') { + if (yych <= 0x08) goto yy1704; + if (yych >= '\n') goto yy1711; + } else { + if (yych == '\r') goto yy1711; + goto yy1704; + } yy1709: ++p; yych = *p; if (yybm[0+yych] & 32) { goto yy1709; } - if (yych != '\n') goto yy1704; + if (yych == '\n') goto yy1711; + if (yych != '\r') goto yy1704; yy1711: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1713: ++p; yych = *p; @@ -14308,23 +14401,28 @@ int _scan_hrule(const unsigned char *p) if (yybm[0+yych] & 64) { goto yy1717; } - if (yych <= 0x08) goto yy1704; - if (yych <= '\t') goto yy1719; - if (yych <= '\n') goto yy1721; - goto yy1704; + if (yych <= '\n') { + if (yych <= 0x08) goto yy1704; + if (yych >= '\n') goto yy1721; + } else { + if (yych == '\r') goto yy1721; + goto yy1704; + } yy1719: ++p; yych = *p; - if (yych <= '\n') { + if (yych <= '\f') { if (yych <= 0x08) goto yy1704; if (yych <= '\t') goto yy1719; + if (yych >= '\v') goto yy1704; } else { + if (yych <= '\r') goto yy1721; if (yych == ' ') goto yy1719; goto yy1704; } yy1721: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1723: ++p; yych = *p; @@ -14341,29 +14439,34 @@ int _scan_hrule(const unsigned char *p) if (yybm[0+yych] & 128) { goto yy1727; } - if (yych <= 0x08) goto yy1704; - if (yych <= '\t') goto yy1729; - if (yych <= '\n') goto yy1731; - goto yy1704; + if (yych <= '\n') { + if (yych <= 0x08) goto yy1704; + if (yych >= '\n') goto yy1731; + } else { + if (yych == '\r') goto yy1731; + goto yy1704; + } yy1729: ++p; yych = *p; - if (yych <= '\n') { + if (yych <= '\f') { if (yych <= 0x08) goto yy1704; if (yych <= '\t') goto yy1729; + if (yych >= '\v') goto yy1704; } else { + if (yych <= '\r') goto yy1731; if (yych == ' ') goto yy1729; goto yy1704; } yy1731: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } } } // Scan an opening code fence. -int _scan_open_code_fence(const unsigned char *p) +bufsize_t _scan_open_code_fence(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -14372,7 +14475,7 @@ int _scan_open_code_fence(const unsigned char *p) unsigned char yych; static const unsigned char yybm[] = { 0, 160, 160, 160, 160, 160, 160, 160, - 160, 160, 0, 160, 160, 160, 160, 160, + 160, 160, 0, 160, 160, 0, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, @@ -14454,7 +14557,7 @@ int _scan_open_code_fence(const unsigned char *p) yy1745: ++p; p = marker; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1747: yych = *++p; if (yybm[0+yych] & 64) { @@ -14482,13 +14585,13 @@ int _scan_open_code_fence(const unsigned char *p) yy1752: ++p; p = marker; - { return (p - start); } + { return (bufsize_t)(p - start); } } } // Scan a closing code fence with length at least len. -int _scan_close_code_fence(const unsigned char *p) +bufsize_t _scan_close_code_fence(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -14565,20 +14668,26 @@ int _scan_close_code_fence(const unsigned char *p) if (yybm[0+yych] & 64) { goto yy1764; } - if (yych == '\n') goto yy1766; - if (yych == '~') goto yy1762; - goto yy1761; + if (yych <= '\f') { + if (yych == '\n') goto yy1766; + goto yy1761; + } else { + if (yych <= '\r') goto yy1766; + if (yych == '~') goto yy1762; + goto yy1761; + } yy1764: ++p; yych = *p; if (yybm[0+yych] & 64) { goto yy1764; } - if (yych != '\n') goto yy1761; + if (yych == '\n') goto yy1766; + if (yych != '\r') goto yy1761; yy1766: ++p; p = marker; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1768: yych = *++p; if (yybm[0+yych] & 128) { @@ -14592,33 +14701,38 @@ int _scan_close_code_fence(const unsigned char *p) if (yybm[0+yych] & 128) { goto yy1769; } - if (yych <= '\n') { + if (yych <= '\f') { if (yych <= 0x08) goto yy1761; - if (yych >= '\n') goto yy1773; + if (yych <= '\t') goto yy1771; + if (yych <= '\n') goto yy1773; + goto yy1761; } else { + if (yych <= '\r') goto yy1773; if (yych != ' ') goto yy1761; } yy1771: ++p; yych = *p; - if (yych <= '\n') { + if (yych <= '\f') { if (yych <= 0x08) goto yy1761; if (yych <= '\t') goto yy1771; + if (yych >= '\v') goto yy1761; } else { + if (yych <= '\r') goto yy1773; if (yych == ' ') goto yy1771; goto yy1761; } yy1773: ++p; p = marker; - { return (p - start); } + { return (bufsize_t)(p - start); } } } // Scans an entity. // Returns number of chars matched. -int _scan_entity(const unsigned char *p) +bufsize_t _scan_entity(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -14685,7 +14799,7 @@ int _scan_entity(const unsigned char *p) } yy1784: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1786: yych = *++p; if (yych <= ';') { diff --git a/scanners.h b/scanners.h index 1353f3b..bc5134e 100644 --- a/scanners.h +++ b/scanners.h @@ -5,21 +5,21 @@ extern "C" { #endif -int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset); -int _scan_scheme(const unsigned char *p); -int _scan_autolink_uri(const unsigned char *p); -int _scan_autolink_email(const unsigned char *p); -int _scan_html_tag(const unsigned char *p); -int _scan_html_block_tag(const unsigned char *p); -int _scan_link_url(const unsigned char *p); -int _scan_link_title(const unsigned char *p); -int _scan_spacechars(const unsigned char *p); -int _scan_atx_header_start(const unsigned char *p); -int _scan_setext_header_line(const unsigned char *p); -int _scan_hrule(const unsigned char *p); -int _scan_open_code_fence(const unsigned char *p); -int _scan_close_code_fence(const unsigned char *p); -int _scan_entity(const unsigned char *p); +bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset); +bufsize_t _scan_scheme(const unsigned char *p); +bufsize_t _scan_autolink_uri(const unsigned char *p); +bufsize_t _scan_autolink_email(const unsigned char *p); +bufsize_t _scan_html_tag(const unsigned char *p); +bufsize_t _scan_html_block_tag(const unsigned char *p); +bufsize_t _scan_link_url(const unsigned char *p); +bufsize_t _scan_link_title(const unsigned char *p); +bufsize_t _scan_spacechars(const unsigned char *p); +bufsize_t _scan_atx_header_start(const unsigned char *p); +bufsize_t _scan_setext_header_line(const unsigned char *p); +bufsize_t _scan_hrule(const unsigned char *p); +bufsize_t _scan_open_code_fence(const unsigned char *p); +bufsize_t _scan_close_code_fence(const unsigned char *p); +bufsize_t _scan_entity(const unsigned char *p); #define scan_scheme(c, n) _scan_at(&_scan_scheme, c, n) #define scan_autolink_uri(c, n) _scan_at(&_scan_autolink_uri, c, n) diff --git a/scanners.re b/scanners.re index 31cdb4f..3722a99 100644 --- a/scanners.re +++ b/scanners.re @@ -2,9 +2,9 @@ #include "chunk.h" #include "scanners.h" -int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset) +bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset) { - int res; + bufsize_t res; unsigned char *ptr = (unsigned char *)c->data; unsigned char lim = ptr[c->len]; @@ -70,29 +70,29 @@ int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset) */ // Try to match a scheme including colon. -int _scan_scheme(const unsigned char *p) +bufsize_t _scan_scheme(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - scheme [:] { return (p - start); } + scheme [:] { return (bufsize_t)(p - start); } .? { return 0; } */ } // Try to match URI autolink after first <, returning number of chars matched. -int _scan_autolink_uri(const unsigned char *p) +bufsize_t _scan_autolink_uri(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - scheme [:][^\x00-\x20<>]*[>] { return (p - start); } + scheme [:][^\x00-\x20<>]*[>] { return (bufsize_t)(p - start); } .? { return 0; } */ } // Try to match email autolink after first <, returning num of chars matched. -int _scan_autolink_email(const unsigned char *p) +bufsize_t _scan_autolink_email(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -101,32 +101,32 @@ int _scan_autolink_email(const unsigned char *p) [@] [a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])? ([.][a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)* - [>] { return (p - start); } + [>] { return (bufsize_t)(p - start); } .? { return 0; } */ } // Try to match an HTML tag after first <, returning num of chars matched. -int _scan_html_tag(const unsigned char *p) +bufsize_t _scan_html_tag(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - htmltag { return (p - start); } + htmltag { return (bufsize_t)(p - start); } .? { return 0; } */ } // Try to match an HTML block tag including first <, // returning num of chars matched. -int _scan_html_block_tag(const unsigned char *p) +bufsize_t _scan_html_block_tag(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - [<] [/] blocktagname (spacechar | [>]) { return (p - start); } - [<] blocktagname (spacechar | [/>]) { return (p - start); } - [<] [!?] { return (p - start); } + [<] [/] blocktagname (spacechar | [>]) { return (bufsize_t)(p - start); } + [<] blocktagname (spacechar | [/>]) { return (bufsize_t)(p - start); } + [<] [!?] { return (bufsize_t)(p - start); } .? { return 0; } */ } @@ -135,13 +135,13 @@ int _scan_html_block_tag(const unsigned char *p) // This may optionally be contained in <..>; otherwise // whitespace and unbalanced right parentheses aren't allowed. // Newlines aren't ever allowed. -int _scan_link_url(const unsigned char *p) +bufsize_t _scan_link_url(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - [ \n]* [<] ([^<>\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); } - [ \n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (p - start); } + [ \r\n]* [<] ([^<>\r\n\\\x00] | escaped_char | [\\])* [>] { return (bufsize_t)(p - start); } + [ \r\n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (bufsize_t)(p - start); } .? { return 0; } */ } @@ -149,47 +149,47 @@ int _scan_link_url(const unsigned char *p) // Try to match a link title (in single quotes, in double quotes, or // in parentheses), returning number of chars matched. Allow one // level of internal nesting (quotes within quotes). -int _scan_link_title(const unsigned char *p) +bufsize_t _scan_link_title(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - ["] (escaped_char|[^"\x00])* ["] { return (p - start); } - ['] (escaped_char|[^'\x00])* ['] { return (p - start); } - [(] (escaped_char|[^)\x00])* [)] { return (p - start); } + ["] (escaped_char|[^"\x00])* ["] { return (bufsize_t)(p - start); } + ['] (escaped_char|[^'\x00])* ['] { return (bufsize_t)(p - start); } + [(] (escaped_char|[^)\x00])* [)] { return (bufsize_t)(p - start); } .? { return 0; } */ } // Match space characters, including newlines. -int _scan_spacechars(const unsigned char *p) +bufsize_t _scan_spacechars(const unsigned char *p) { const unsigned char *start = p; \ /*!re2c - [ \t\v\f\r\n]* { return (p - start); } + [ \t\v\f\r\n]* { return (bufsize_t)(p - start); } . { return 0; } */ } // Match ATX header start. -int _scan_atx_header_start(const unsigned char *p) +bufsize_t _scan_atx_header_start(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - [#]{1,6} ([ ]+|[\n]) { return (p - start); } + [#]{1,6} ([ ]+|[\r\n]) { return (bufsize_t)(p - start); } .? { return 0; } */ } -// Match sexext header line. Return 1 for level-1 header, +// Match setext header line. Return 1 for level-1 header, // 2 for level-2, 0 for no match. -int _scan_setext_header_line(const unsigned char *p) +bufsize_t _scan_setext_header_line(const unsigned char *p) { const unsigned char *marker = NULL; /*!re2c - [=]+ [ ]* [\n] { return 1; } - [-]+ [ ]* [\n] { return 2; } + [=]+ [ ]* [\r\n] { return 1; } + [-]+ [ ]* [\r\n] { return 2; } .? { return 0; } */ } @@ -197,51 +197,51 @@ int _scan_setext_header_line(const unsigned char *p) // Scan a horizontal rule line: "...three or more hyphens, asterisks, // or underscores on a line by themselves. If you wish, you may use // spaces between the hyphens or asterisks." -int _scan_hrule(const unsigned char *p) +bufsize_t _scan_hrule(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - ([*][ ]*){3,} [ \t]* [\n] { return (p - start); } - ([_][ ]*){3,} [ \t]* [\n] { return (p - start); } - ([-][ ]*){3,} [ \t]* [\n] { return (p - start); } + ([*][ ]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); } + ([_][ ]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); } + ([-][ ]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); } .? { return 0; } */ } // Scan an opening code fence. -int _scan_open_code_fence(const unsigned char *p) +bufsize_t _scan_open_code_fence(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - [`]{3,} / [^`\n\x00]*[\n] { return (p - start); } - [~]{3,} / [^~\n\x00]*[\n] { return (p - start); } + [`]{3,} / [^`\r\n\x00]*[\r\n] { return (bufsize_t)(p - start); } + [~]{3,} / [^~\r\n\x00]*[\r\n] { return (bufsize_t)(p - start); } .? { return 0; } */ } // Scan a closing code fence with length at least len. -int _scan_close_code_fence(const unsigned char *p) +bufsize_t _scan_close_code_fence(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - [`]{3,} / [ \t]*[\n] { return (p - start); } - [~]{3,} / [ \t]*[\n] { return (p - start); } + [`]{3,} / [ \t]*[\r\n] { return (bufsize_t)(p - start); } + [~]{3,} / [ \t]*[\r\n] { return (bufsize_t)(p - start); } .? { return 0; } */ } // Scans an entity. // Returns number of chars matched. -int _scan_entity(const unsigned char *p) +bufsize_t _scan_entity(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c [&] ([#] ([Xx][A-Fa-f0-9]{1,8}|[0-9]{1,8}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;] - { return (p - start); } + { return (bufsize_t)(p - start); } .? { return 0; } */ } diff --git a/utf8.c b/utf8.c index b83c2a5..ba1d873 100644 --- a/utf8.c +++ b/utf8.c @@ -30,7 +30,7 @@ static void encode_unknown(cmark_strbuf *buf) cmark_strbuf_put(buf, repl, 3); } -static int utf8proc_charlen(const uint8_t *str, int str_len) +static int utf8proc_charlen(const uint8_t *str, bufsize_t str_len) { int length, i; @@ -42,7 +42,7 @@ static int utf8proc_charlen(const uint8_t *str, int str_len) if (!length) return -1; - if (str_len >= 0 && length > str_len) + if (str_len >= 0 && (bufsize_t)length > str_len) return -str_len; for (i = 1; i < length; i++) { @@ -54,7 +54,7 @@ static int utf8proc_charlen(const uint8_t *str, int str_len) } // Validate a single UTF-8 character according to RFC 3629. -static int utf8proc_valid(const uint8_t *str, int str_len) +static int utf8proc_valid(const uint8_t *str, bufsize_t str_len) { int length = utf8proc_charlen(str, str_len); @@ -109,14 +109,14 @@ static int utf8proc_valid(const uint8_t *str, int str_len) return length; } -void utf8proc_detab(cmark_strbuf *ob, const uint8_t *line, size_t size) +void utf8proc_detab(cmark_strbuf *ob, const uint8_t *line, bufsize_t size) { static const uint8_t whitespace[] = " "; - size_t i = 0, tab = 0; + bufsize_t i = 0, tab = 0; while (i < size) { - size_t org = i; + bufsize_t org = i; while (i < size && line[i] != '\t' && line[i] != '\0' && line[i] < 0x80) { @@ -151,7 +151,7 @@ void utf8proc_detab(cmark_strbuf *ob, const uint8_t *line, size_t size) } } -int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst) +int utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst) { int length; int32_t uc = -1; @@ -191,7 +191,7 @@ int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst) void utf8proc_encode_char(int32_t uc, cmark_strbuf *buf) { uint8_t dst[4]; - int len = 0; + bufsize_t len = 0; assert(uc >= 0); @@ -227,7 +227,7 @@ void utf8proc_encode_char(int32_t uc, cmark_strbuf *buf) cmark_strbuf_put(buf, dst, len); } -void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, int len) +void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, bufsize_t len) { int32_t c; @@ -235,7 +235,7 @@ void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, int len) utf8proc_encode_char(x, dest) while (len > 0) { - int char_len = utf8proc_iterate(str, len, &c); + bufsize_t char_len = utf8proc_iterate(str, len, &c); if (char_len >= 0) { #include "case_fold_switch.inc" diff --git a/utf8.h b/utf8.h index 7df1573..ed1d7ee 100644 --- a/utf8.h +++ b/utf8.h @@ -8,10 +8,10 @@ extern "C" { #endif -void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, int len); +void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, bufsize_t len); void utf8proc_encode_char(int32_t uc, cmark_strbuf *buf); -int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst); -void utf8proc_detab(cmark_strbuf *dest, const uint8_t *line, size_t size); +int utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst); +void utf8proc_detab(cmark_strbuf *dest, const uint8_t *line, bufsize_t size); int utf8proc_is_space(int32_t uc); int utf8proc_is_punctuation(int32_t uc); diff --git a/xml.c b/xml.c index 845e553..7eec5a6 100644 --- a/xml.c +++ b/xml.c @@ -11,14 +11,9 @@ // Functions to convert cmark_nodes to XML strings. -static void escape_xml(cmark_strbuf *dest, const unsigned char *source, int length) +static void escape_xml(cmark_strbuf *dest, const unsigned char *source, bufsize_t length) { - if (source != NULL) { - if (length < 0) - length = strlen((char *)source); - - houdini_escape_html0(dest, source, (size_t)length, 0); - } + houdini_escape_html0(dest, source, length, 0); } struct render_state { @@ -118,10 +113,12 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, case CMARK_NODE_LINK: case CMARK_NODE_IMAGE: cmark_strbuf_puts(xml, " destination=\""); - escape_xml(xml, node->as.link.url, -1); + escape_xml(xml, node->as.link.url.data, + node->as.link.url.len); cmark_strbuf_putc(xml, '"'); cmark_strbuf_puts(xml, " title=\""); - escape_xml(xml, node->as.link.title, -1); + escape_xml(xml, node->as.link.title.data, + node->as.link.title.len); cmark_strbuf_putc(xml, '"'); break; default: