Skip to content

Commit

Permalink
fix!: drasticly improve escaping !, #, and |
Browse files Browse the repository at this point in the history
Raises spec tests from 578 to 580. It handles cases that look like these

    Link, not image: \![a](b)

    This header ends with hashes, not an ATX trailer ###
    ====================================================

    | a \| b | a \| c |
    |--------|--------|
  • Loading branch information
notriddle committed Oct 17, 2024
1 parent 6c8bc4d commit 76c24a1
Show file tree
Hide file tree
Showing 7 changed files with 88 additions and 20 deletions.
14 changes: 12 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ pub struct State<'a> {
pub last_was_text_without_trailing_newline: bool,
/// True if the last event was a paragraph start. Used to escape spaces at start of line (prevent spurrious indented code).
pub last_was_paragraph_start: bool,
/// True if the next event is a link, image, or footnote.
pub next_is_link_like: bool,
/// Currently open links
pub link_stack: Vec<LinkCategory<'a>>,
/// Currently open images
Expand Down Expand Up @@ -254,7 +256,15 @@ where
F: fmt::Write,
{
let mut state = state.unwrap_or_default();
for event in events {
let mut events = events.peekable();
while let Some(event) = events.next() {
state.next_is_link_like = matches!(
events.peek().map(Borrow::borrow),
Some(
Event::Start(Tag::Link { .. } | Tag::Image { .. } | Tag::FootnoteDefinition(..))
| Event::FootnoteReference(..)
)
);
cmark_resume_one_event(event, &mut formatter, &mut state, &options)?;
}
Ok(state)
Expand Down Expand Up @@ -776,7 +786,7 @@ where
}
state.last_was_text_without_trailing_newline = !text.ends_with('\n');
print_text_without_trailing_newline(
&escape_leading_special_characters(text, state.is_in_code_block(), options),
&escape_special_characters(text, &state, options),
formatter,
&state.padding,
)
Expand Down
3 changes: 3 additions & 0 deletions src/source_range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ where
for (event, range) in event_and_ranges {
let update_event_end_index = !matches!(*event.borrow(), Event::Start(_));
let prevent_escape_leading_special_characters = match (&range, event.borrow()) {
// Headers and tables can have special characters that aren't at the start
// of the line, because headers end with `#` and tables have pipes in the middle.
_ if state.current_heading.is_some() || !state.table_alignments.is_empty() => false,
// IMPORTANT: Any changes that allow anything other than `Text`
// breaks the assumption below.
(Some(range), Event::Text(_)) => {
Expand Down
24 changes: 14 additions & 10 deletions src/text_modifications.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,25 @@ where
Ok(())
}

pub fn escape_leading_special_characters<'a>(
t: &'a str,
is_in_code_block: bool,
options: &Options<'a>,
) -> Cow<'a, str> {
if is_in_code_block || t.is_empty() {
pub fn escape_special_characters<'a>(t: &'a str, state: &State<'a>, options: &Options<'a>) -> Cow<'a, str> {
if state.is_in_code_block() || t.is_empty() {
return Cow::Borrowed(t);
}

let first = t.chars().next().expect("at least one char");
if options.special_characters().contains(first) {
let first_special = options.special_characters().contains(first);
let ends_with_special =
(state.next_is_link_like && t.ends_with("!")) || (state.current_heading.is_some() && t.ends_with("#"));
let table_contains_pipe = !state.table_alignments.is_empty() && t.contains("|");
if first_special || ends_with_special || table_contains_pipe {
let mut s = String::with_capacity(t.len() + 1);
s.push('\\');
s.push(first);
s.push_str(&t[1..]);
for (i, c) in t.char_indices() {
if (i == 0 && first_special) || (i == t.len() - 1 && ends_with_special) || (c == '|' && table_contains_pipe)
{
s.push('\\');
}
s.push(c);
}
Cow::Owned(s)
} else {
Cow::Borrowed(t)
Expand Down
14 changes: 13 additions & 1 deletion tests/display.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ mod start {
Tag::*,
};

use super::s;
use super::{es, s};

#[test]
fn paragraph() {
Expand Down Expand Up @@ -178,6 +178,18 @@ mod start {
fn table_cell() {
assert_eq!(s(Start(TableCell)), "|");
}
#[test]
fn table_pipe() {
assert_eq!(
es([
Start(Table(vec![Left, Center, Right, Alignment::None])),
Start(TableHead),
Start(TableCell),
Text("a|b".into()),
]),
r"|a\|b"
);
}

#[test]
fn definition_list_definition() {
Expand Down
42 changes: 39 additions & 3 deletions tests/fmt.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use pulldown_cmark::{Alignment, CodeBlockKind, Event, LinkType, Options, Parser, Tag, TagEnd};
use pulldown_cmark::{utils::TextMergeStream, Alignment, CodeBlockKind, Event, LinkType, Options, Parser, Tag, TagEnd};
use pulldown_cmark_to_cmark::{cmark, cmark_resume, cmark_resume_with_options, Options as CmarkToCmarkOptions, State};

mod source_range_fmt;
Expand Down Expand Up @@ -60,8 +60,8 @@ fn assert_events_eq(s: &str) {
let mut buf = String::new();
cmark(before_events, &mut buf).unwrap();

let before_events = Parser::new_ext(s, Options::all());
let after_events = Parser::new_ext(&buf, Options::all());
let before_events = TextMergeStream::new(Parser::new_ext(s, Options::all()));
let after_events = TextMergeStream::new(Parser::new_ext(&buf, Options::all()));
println!("{buf}");
assert_eq!(before_events.collect::<Vec<_>>(), after_events.collect::<Vec<_>>());
}
Expand Down Expand Up @@ -1010,6 +1010,36 @@ mod table {
let p = Parser::new_ext(&generated_markdown, Options::all());
let generated_events: Vec<_> = p.into_iter().collect();

assert_eq!(original_events, generated_events);
}
#[test]
fn table_with_pipe_in_column() {
use pulldown_cmark::{Options, Parser};

let original_table_markdown = indoc!(
r"
| \| | a\|b |
|----|------|
| \| | a\|b |"
);
let p = Parser::new_ext(original_table_markdown, Options::all());
let original_events: Vec<_> = p.into_iter().collect();

let (generated_markdown, _) = fmte(&original_events);

assert_eq!(
generated_markdown,
indoc!(
r"
|\||a\|b|
|-|---|
|\||a\|b|"
)
);

let p = Parser::new_ext(&generated_markdown, Options::all());
let generated_events: Vec<_> = p.into_iter().collect();

assert_eq!(original_events, generated_events);
}
}
Expand Down Expand Up @@ -1452,6 +1482,12 @@ mod heading {
assert_events_eq_both("# Heading { #id .class1 key1=val1 .class2 }");
assert_events_eq_both("# Heading { #id .class1 .class2 key1=val1 key2 }");
}
#[test]
fn heading_with_hashes_at_end() {
assert_events_eq_both("Heading #\n====");
assert_events_eq_both("Heading \\#\n====");
assert_events_eq_both("# Heading \\#");
}
}

mod frontmatter {
Expand Down
6 changes: 3 additions & 3 deletions tests/source_range_fmt.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// Copied from `fmt.rs`.

use pulldown_cmark::{Options, Parser};
use pulldown_cmark::{utils::TextMergeStream, Options, Parser};
use pulldown_cmark_to_cmark::{
cmark_resume_with_source_range_and_options, cmark_with_source_range, Options as CmarkToCmarkOptions, State,
};
Expand Down Expand Up @@ -50,7 +50,7 @@ pub fn assert_events_eq(s: &str) {
)
.unwrap();

let before_events = Parser::new_ext(s, Options::all());
let after_events = Parser::new_ext(&buf, Options::all());
let before_events = TextMergeStream::new(Parser::new_ext(s, Options::all()));
let after_events = TextMergeStream::new(Parser::new_ext(&buf, Options::all()));
assert_eq!(before_events.collect::<Vec<_>>(), after_events.collect::<Vec<_>>());
}
5 changes: 4 additions & 1 deletion tests/spec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,12 @@ fn collect_test_case<'a>(events: &mut impl Iterator<Item = Event<'a>>) -> Option

fn test_roundtrip(original: &str, expected: &str) -> bool {
let opts = Options::empty();
let event_list = TextMergeStream::new(Parser::new_ext(original, opts)).collect::<Vec<_>>();
let event_list = Parser::new_ext(original, opts).collect::<Vec<_>>();
let mut regen_str = String::new();
cmark(event_list.iter().cloned(), &mut regen_str).expect("Regeneration failure");
// text events should be merged before comparing two event lists for equivalence.
// you don't need to merge them before feeding them into `cmark`.
let event_list: Vec<Event<'_>> = TextMergeStream::new(event_list.into_iter()).collect();
let event_list_2 = TextMergeStream::new(Parser::new_ext(&regen_str, opts)).collect::<Vec<_>>();
let event_count = event_list.len();
let event_count_2 = event_list_2.len();
Expand Down

0 comments on commit 76c24a1

Please sign in to comment.