From 6f47a858f27a9e9546eaf9368b2a4fba3a4d74b8 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Mon, 18 Sep 2023 22:09:51 +0200 Subject: [PATCH 1/5] Use old parser if `custom_code_classes_in_docs` feature is not enabled --- src/librustdoc/html/markdown.rs | 239 ++++++++++++++++++-------------- 1 file changed, 135 insertions(+), 104 deletions(-) diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs index 59958fbaef918..7528393f0f8cd 100644 --- a/src/librustdoc/html/markdown.rs +++ b/src/librustdoc/html/markdown.rs @@ -868,7 +868,7 @@ impl<'tcx> ExtraInfo<'tcx> { #[derive(Eq, PartialEq, Clone, Debug)] pub(crate) struct LangString { - original: String, + pub(crate) original: String, pub(crate) should_panic: bool, pub(crate) no_run: bool, pub(crate) ignore: Ignore, @@ -1158,6 +1158,29 @@ impl<'a, 'tcx> Iterator for TagIterator<'a, 'tcx> { } } +fn tokens(string: &str) -> impl Iterator> { + // Pandoc, which Rust once used for generating documentation, + // expects lang strings to be surrounded by `{}` and for each token + // to be proceeded by a `.`. Since some of these lang strings are still + // loose in the wild, we strip a pair of surrounding `{}` from the lang + // string and a leading `.` from each token. + + let string = string.trim(); + + let first = string.chars().next(); + let last = string.chars().last(); + + let string = + if first == Some('{') && last == Some('}') { &string[1..string.len() - 1] } else { string }; + + string + .split(|c| c == ',' || c == ' ' || c == '\t') + .map(str::trim) + .map(|token| token.strip_prefix('.').unwrap_or(token)) + .filter(|token| !token.is_empty()) + .map(|token| LangStringToken::LangToken(token)) +} + impl Default for LangString { fn default() -> Self { Self { @@ -1208,122 +1231,130 @@ impl LangString { data.original = string.to_owned(); - for token in TagIterator::new(string, extra) { - match token { - LangStringToken::LangToken("should_panic") => { - data.should_panic = true; - seen_rust_tags = !seen_other_tags; - } - LangStringToken::LangToken("no_run") => { - data.no_run = true; - seen_rust_tags = !seen_other_tags; - } - LangStringToken::LangToken("ignore") => { - data.ignore = Ignore::All; - seen_rust_tags = !seen_other_tags; - } - LangStringToken::LangToken(x) if x.starts_with("ignore-") => { - if enable_per_target_ignores { - ignores.push(x.trim_start_matches("ignore-").to_owned()); + let mut call = |tokens: &mut dyn Iterator>| { + for token in tokens { + match token { + LangStringToken::LangToken("should_panic") => { + data.should_panic = true; seen_rust_tags = !seen_other_tags; } - } - LangStringToken::LangToken("rust") => { - data.rust = true; - seen_rust_tags = true; - } - LangStringToken::LangToken("custom") => { - if custom_code_classes_in_docs { - seen_custom_tag = true; - } else { - seen_other_tags = true; + LangStringToken::LangToken("no_run") => { + data.no_run = true; + seen_rust_tags = !seen_other_tags; } - } - LangStringToken::LangToken("test_harness") => { - data.test_harness = true; - seen_rust_tags = !seen_other_tags || seen_rust_tags; - } - LangStringToken::LangToken("compile_fail") => { - data.compile_fail = true; - seen_rust_tags = !seen_other_tags || seen_rust_tags; - data.no_run = true; - } - LangStringToken::LangToken(x) if x.starts_with("edition") => { - data.edition = x[7..].parse::().ok(); - } - LangStringToken::LangToken(x) - if allow_error_code_check && x.starts_with('E') && x.len() == 5 => - { - if x[1..].parse::().is_ok() { - data.error_codes.push(x.to_owned()); + LangStringToken::LangToken("ignore") => { + data.ignore = Ignore::All; + seen_rust_tags = !seen_other_tags; + } + LangStringToken::LangToken(x) if x.starts_with("ignore-") => { + if enable_per_target_ignores { + ignores.push(x.trim_start_matches("ignore-").to_owned()); + seen_rust_tags = !seen_other_tags; + } + } + LangStringToken::LangToken("rust") => { + data.rust = true; + seen_rust_tags = true; + } + LangStringToken::LangToken("custom") => { + if custom_code_classes_in_docs { + seen_custom_tag = true; + } else { + seen_other_tags = true; + } + } + LangStringToken::LangToken("test_harness") => { + data.test_harness = true; seen_rust_tags = !seen_other_tags || seen_rust_tags; - } else { - seen_other_tags = true; } - } - LangStringToken::LangToken(x) if extra.is_some() => { - let s = x.to_lowercase(); - if let Some((flag, help)) = if s == "compile-fail" - || s == "compile_fail" - || s == "compilefail" + LangStringToken::LangToken("compile_fail") => { + data.compile_fail = true; + seen_rust_tags = !seen_other_tags || seen_rust_tags; + data.no_run = true; + } + LangStringToken::LangToken(x) if x.starts_with("edition") => { + data.edition = x[7..].parse::().ok(); + } + LangStringToken::LangToken(x) + if allow_error_code_check && x.starts_with('E') && x.len() == 5 => { - Some(( - "compile_fail", - "the code block will either not be tested if not marked as a rust one \ - or won't fail if it compiles successfully", - )) - } else if s == "should-panic" || s == "should_panic" || s == "shouldpanic" { - Some(( - "should_panic", - "the code block will either not be tested if not marked as a rust one \ - or won't fail if it doesn't panic when running", - )) - } else if s == "no-run" || s == "no_run" || s == "norun" { - Some(( - "no_run", - "the code block will either not be tested if not marked as a rust one \ - or will be run (which you might not want)", - )) - } else if s == "test-harness" || s == "test_harness" || s == "testharness" { - Some(( - "test_harness", - "the code block will either not be tested if not marked as a rust one \ - or the code will be wrapped inside a main function", - )) - } else { - None - } { - if let Some(extra) = extra { - extra.error_invalid_codeblock_attr_with_help( - format!("unknown attribute `{x}`. Did you mean `{flag}`?"), - help, - ); + if x[1..].parse::().is_ok() { + data.error_codes.push(x.to_owned()); + seen_rust_tags = !seen_other_tags || seen_rust_tags; + } else { + seen_other_tags = true; } } - seen_other_tags = true; - data.unknown.push(x.to_owned()); - } - LangStringToken::LangToken(x) => { - seen_other_tags = true; - data.unknown.push(x.to_owned()); - } - LangStringToken::KeyValueAttribute(key, value) => { - if custom_code_classes_in_docs { - if key == "class" { - data.added_classes.push(value.to_owned()); - } else if let Some(extra) = extra { - extra.error_invalid_codeblock_attr(format!( - "unsupported attribute `{key}`" - )); + LangStringToken::LangToken(x) if extra.is_some() => { + let s = x.to_lowercase(); + if let Some((flag, help)) = if s == "compile-fail" + || s == "compile_fail" + || s == "compilefail" + { + Some(( + "compile_fail", + "the code block will either not be tested if not marked as a rust one \ + or won't fail if it compiles successfully", + )) + } else if s == "should-panic" || s == "should_panic" || s == "shouldpanic" { + Some(( + "should_panic", + "the code block will either not be tested if not marked as a rust one \ + or won't fail if it doesn't panic when running", + )) + } else if s == "no-run" || s == "no_run" || s == "norun" { + Some(( + "no_run", + "the code block will either not be tested if not marked as a rust one \ + or will be run (which you might not want)", + )) + } else if s == "test-harness" || s == "test_harness" || s == "testharness" { + Some(( + "test_harness", + "the code block will either not be tested if not marked as a rust one \ + or the code will be wrapped inside a main function", + )) + } else { + None + } { + if let Some(extra) = extra { + extra.error_invalid_codeblock_attr_with_help( + format!("unknown attribute `{x}`. Did you mean `{flag}`?"), + help, + ); + } } - } else { seen_other_tags = true; + data.unknown.push(x.to_owned()); + } + LangStringToken::LangToken(x) => { + seen_other_tags = true; + data.unknown.push(x.to_owned()); + } + LangStringToken::KeyValueAttribute(key, value) => { + if custom_code_classes_in_docs { + if key == "class" { + data.added_classes.push(value.to_owned()); + } else if let Some(extra) = extra { + extra.error_invalid_codeblock_attr(format!( + "unsupported attribute `{key}`" + )); + } + } else { + seen_other_tags = true; + } + } + LangStringToken::ClassAttribute(class) => { + data.added_classes.push(class.to_owned()); } - } - LangStringToken::ClassAttribute(class) => { - data.added_classes.push(class.to_owned()); } } + }; + + if custom_code_classes_in_docs { + call(&mut TagIterator::new(string, extra).into_iter()) + } else { + call(&mut tokens(string)) } // ignore-foo overrides ignore From 494fdcd8ecf0fea910fdc17c84dbd925e80e2b75 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Mon, 18 Sep 2023 22:11:03 +0200 Subject: [PATCH 2/5] Add new rustdoc-ui test for `custom_code_classes_in_docs` feature --- .../feature-gate-custom_code_classes_in_docs.rs | 8 ++++++++ .../feature-gate-custom_code_classes_in_docs.stderr | 13 ++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/tests/rustdoc-ui/feature-gate-custom_code_classes_in_docs.rs b/tests/rustdoc-ui/feature-gate-custom_code_classes_in_docs.rs index 3f0f8b5b9f997..4fcced86a3ba6 100644 --- a/tests/rustdoc-ui/feature-gate-custom_code_classes_in_docs.rs +++ b/tests/rustdoc-ui/feature-gate-custom_code_classes_in_docs.rs @@ -8,3 +8,11 @@ //~| NOTE see issue #79483 //~| HELP add `#![feature(custom_code_classes_in_docs)]` to the crate attributes to enable pub struct Bar; + +/// ```ASN.1 +/// int main(void) { return 0; } +/// ``` +//~^^^ WARNING custom classes in code blocks will change behaviour +//~| NOTE see issue #79483 +//~| HELP add `#![feature(custom_code_classes_in_docs)]` to the crate attributes to enable +pub struct Bar2; diff --git a/tests/rustdoc-ui/feature-gate-custom_code_classes_in_docs.stderr b/tests/rustdoc-ui/feature-gate-custom_code_classes_in_docs.stderr index 1a2360d9b3004..c6db54b31284d 100644 --- a/tests/rustdoc-ui/feature-gate-custom_code_classes_in_docs.stderr +++ b/tests/rustdoc-ui/feature-gate-custom_code_classes_in_docs.stderr @@ -10,5 +10,16 @@ LL | | /// ``` = help: add `#![feature(custom_code_classes_in_docs)]` to the crate attributes to enable = note: found these custom classes: class=language-c -warning: 1 warning emitted +warning: custom classes in code blocks will change behaviour + --> $DIR/feature-gate-custom_code_classes_in_docs.rs:12:1 + | +LL | / /// ```ASN.1 +LL | | /// int main(void) { return 0; } +LL | | /// ``` + | |_______^ + | + = note: see issue #79483 for more information + = help: add `#![feature(custom_code_classes_in_docs)]` to the crate attributes to enable + +warning: 2 warnings emitted From 88b070aa925be19937e59ea576dd25e47b2c26f7 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Tue, 19 Sep 2023 13:20:18 +0200 Subject: [PATCH 3/5] Return early in `check_custom_code_classes` check if the feature is enabled --- src/librustdoc/passes/check_custom_code_classes.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/librustdoc/passes/check_custom_code_classes.rs b/src/librustdoc/passes/check_custom_code_classes.rs index 1a703a4e96755..6266d3ff51d64 100644 --- a/src/librustdoc/passes/check_custom_code_classes.rs +++ b/src/librustdoc/passes/check_custom_code_classes.rs @@ -21,6 +21,10 @@ pub(crate) const CHECK_CUSTOM_CODE_CLASSES: Pass = Pass { }; pub(crate) fn check_custom_code_classes(krate: Crate, cx: &mut DocContext<'_>) -> Crate { + if cx.tcx.features().custom_code_classes_in_docs { + // Nothing to check here if the feature is enabled. + return krate; + } let mut coll = CustomCodeClassLinter { cx }; coll.fold_crate(krate) @@ -59,7 +63,7 @@ pub(crate) fn look_for_custom_classes<'tcx>(cx: &DocContext<'tcx>, item: &Item) let dox = item.attrs.doc_value(); find_codes(&dox, &mut tests, ErrorCodes::No, false, None, true, true); - if !tests.custom_classes_found.is_empty() && !cx.tcx.features().custom_code_classes_in_docs { + if !tests.custom_classes_found.is_empty() { let span = item.attr_span(cx.tcx); let sess = &cx.tcx.sess.parse_sess; let mut err = sess From 6bed1c60619b23ccb3011282fa1eed200f71ee0a Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Tue, 19 Sep 2023 17:29:30 +0200 Subject: [PATCH 4/5] Allow more characters in custom classes --- src/librustdoc/html/markdown.rs | 36 ++++++++++++++++++++------- src/librustdoc/html/markdown/tests.rs | 21 +++++++++++++--- 2 files changed, 45 insertions(+), 12 deletions(-) diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs index 7528393f0f8cd..6dbf2185e3d03 100644 --- a/src/librustdoc/html/markdown.rs +++ b/src/librustdoc/html/markdown.rs @@ -893,11 +893,13 @@ pub(crate) enum Ignore { /// ```eBNF /// lang-string = *(token-list / delimited-attribute-list / comment) /// -/// bareword = CHAR *(CHAR) +/// bareword = LEADINGCHAR *(CHAR) +/// bareword-without-leading-char = CHAR *(CHAR) /// quoted-string = QUOTE *(NONQUOTE) QUOTE /// token = bareword / quoted-string +/// token-without-leading-char = bareword-without-leading-char / quoted-string /// sep = COMMA/WS *(COMMA/WS) -/// attribute = (DOT token)/(token EQUAL token) +/// attribute = (DOT token)/(token EQUAL token-without-leading-char) /// attribute-list = [sep] attribute *(sep attribute) [sep] /// delimited-attribute-list = OPEN-CURLY-BRACKET attribute-list CLOSE-CURLY-BRACKET /// token-list = [sep] token *(sep token) [sep] @@ -907,8 +909,15 @@ pub(crate) enum Ignore { /// CLOSE_PARENT = ")" /// OPEN-CURLY-BRACKET = "{" /// CLOSE-CURLY-BRACKET = "}" -/// CHAR = ALPHA / DIGIT / "_" / "-" / ":" -/// QUOTE = %x22 +/// LEADINGCHAR = ALPHA | DIGIT | "_" | "-" | ":" +/// ; All ASCII punctuation except comma, quote, equals, backslash, grave (backquote) and braces. +/// ; Comma is used to separate language tokens, so it can't be used in one. +/// ; Quote is used to allow otherwise-disallowed characters in language tokens. +/// ; Equals is used to make key=value pairs in attribute blocks. +/// ; Backslash and grave are special Markdown characters. +/// ; Braces are used to start an attribute block. +/// CHAR = ALPHA | DIGIT | "_" | "-" | ":" | "." | "!" | "#" | "$" | "%" | "&" | "*" | "+" | "/" | +/// ";" | "<" | ">" | "?" | "@" | "^" | "|" | "~" /// NONQUOTE = %x09 / %x20 / %x21 / %x23-7E ; TAB / SPACE / all printable characters except `"` /// COMMA = "," /// DOT = "." @@ -932,9 +941,12 @@ pub(crate) enum LangStringToken<'a> { KeyValueAttribute(&'a str, &'a str), } -fn is_bareword_char(c: char) -> bool { +fn is_leading_char(c: char) -> bool { c == '_' || c == '-' || c == ':' || c.is_ascii_alphabetic() || c.is_ascii_digit() } +fn is_bareword_char(c: char) -> bool { + is_leading_char(c) || ".!#$%&*+/;<>?@^|~".contains(c) +} fn is_separator(c: char) -> bool { c == ' ' || c == ',' || c == '\t' } @@ -1077,7 +1089,7 @@ impl<'a, 'tcx> TagIterator<'a, 'tcx> { return self.next(); } else if c == '.' { return self.parse_class(pos); - } else if c == '"' || is_bareword_char(c) { + } else if c == '"' || is_leading_char(c) { return self.parse_key_value(c, pos); } else { self.emit_error(format!("unexpected character `{c}`")); @@ -1107,7 +1119,11 @@ impl<'a, 'tcx> TagIterator<'a, 'tcx> { return None; } let indices = self.parse_string(pos)?; - if let Some((_, c)) = self.inner.peek().copied() && c != '{' && !is_separator(c) && c != '(' { + if let Some((_, c)) = self.inner.peek().copied() && + c != '{' && + !is_separator(c) && + c != '(' + { self.emit_error(format!("expected ` `, `{{` or `,` after `\"`, found `{c}`")); return None; } @@ -1115,8 +1131,6 @@ impl<'a, 'tcx> TagIterator<'a, 'tcx> { } else if c == '{' { self.is_in_attribute_block = true; return self.next(); - } else if is_bareword_char(c) { - continue; } else if is_separator(c) { if pos != start { return Some(LangStringToken::LangToken(&self.data[start..pos])); @@ -1130,6 +1144,10 @@ impl<'a, 'tcx> TagIterator<'a, 'tcx> { return Some(LangStringToken::LangToken(&self.data[start..pos])); } return self.next(); + } else if pos == start && is_leading_char(c) { + continue; + } else if pos != start && is_bareword_char(c) { + continue; } else { self.emit_error(format!("unexpected character `{c}`")); return None; diff --git a/src/librustdoc/html/markdown/tests.rs b/src/librustdoc/html/markdown/tests.rs index 32957ac57fae9..5eba1d0609f38 100644 --- a/src/librustdoc/html/markdown/tests.rs +++ b/src/librustdoc/html/markdown/tests.rs @@ -226,13 +226,28 @@ fn test_lang_string_parse() { ..Default::default() }); // error - t(LangString { original: "{.first.second}".into(), rust: true, ..Default::default() }); + t(LangString { + original: "{.first.second}".into(), + rust: true, + added_classes: vec!["first.second".into()], + ..Default::default() + }); // error t(LangString { original: "{class=first=second}".into(), rust: true, ..Default::default() }); // error - t(LangString { original: "{class=first.second}".into(), rust: true, ..Default::default() }); + t(LangString { + original: "{class=first.second}".into(), + rust: true, + added_classes: vec!["first.second".into()], + ..Default::default() + }); // error - t(LangString { original: "{class=.first}".into(), rust: true, ..Default::default() }); + t(LangString { + original: "{class=.first}".into(), + added_classes: vec![".first".into()], + rust: true, + ..Default::default() + }); t(LangString { original: r#"{class="first"}"#.into(), added_classes: vec!["first".into()], From 295ec09b63b1c8352d4a75895eb5d8bd85975b16 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Tue, 19 Sep 2023 17:29:39 +0200 Subject: [PATCH 5/5] Update tests for custom classes --- .../custom_code_classes_in_docs-warning.rs | 8 +++--- ...custom_code_classes_in_docs-warning.stderr | 26 ++++--------------- ...eature-gate-custom_code_classes_in_docs.rs | 3 --- ...re-gate-custom_code_classes_in_docs.stderr | 13 +--------- 4 files changed, 9 insertions(+), 41 deletions(-) diff --git a/tests/rustdoc-ui/custom_code_classes_in_docs-warning.rs b/tests/rustdoc-ui/custom_code_classes_in_docs-warning.rs index dd8759b7e378a..5398d5833c75e 100644 --- a/tests/rustdoc-ui/custom_code_classes_in_docs-warning.rs +++ b/tests/rustdoc-ui/custom_code_classes_in_docs-warning.rs @@ -57,25 +57,23 @@ pub fn foo8() {} /// ```{class=one=two} /// main; /// ``` -//~^^^ ERROR unexpected `=` +//~^^^ ERROR unexpected `=` character pub fn foo9() {} /// ```{.one.two} /// main; /// ``` -//~^^^ ERROR unexpected `.` character pub fn foo10() {} -/// ```{class=.one} +/// ```{class=(one} /// main; /// ``` -//~^^^ ERROR unexpected `.` character after `=` +//~^^^ ERROR unexpected `(` character after `=` pub fn foo11() {} /// ```{class=one.two} /// main; /// ``` -//~^^^ ERROR unexpected `.` character pub fn foo12() {} /// ```{(comment)} diff --git a/tests/rustdoc-ui/custom_code_classes_in_docs-warning.stderr b/tests/rustdoc-ui/custom_code_classes_in_docs-warning.stderr index 3e0dc4b2f7a6b..14b4b3bab3fad 100644 --- a/tests/rustdoc-ui/custom_code_classes_in_docs-warning.stderr +++ b/tests/rustdoc-ui/custom_code_classes_in_docs-warning.stderr @@ -77,37 +77,21 @@ LL | | /// main; LL | | /// ``` | |_______^ -error: unexpected `.` character - --> $DIR/custom_code_classes_in_docs-warning.rs:63:1 +error: unexpected `(` character after `=` + --> $DIR/custom_code_classes_in_docs-warning.rs:68:1 | -LL | / /// ```{.one.two} -LL | | /// main; -LL | | /// ``` - | |_______^ - -error: unexpected `.` character after `=` - --> $DIR/custom_code_classes_in_docs-warning.rs:69:1 - | -LL | / /// ```{class=.one} -LL | | /// main; -LL | | /// ``` - | |_______^ - -error: unexpected `.` character - --> $DIR/custom_code_classes_in_docs-warning.rs:75:1 - | -LL | / /// ```{class=one.two} +LL | / /// ```{class=(one} LL | | /// main; LL | | /// ``` | |_______^ error: unexpected character `(` - --> $DIR/custom_code_classes_in_docs-warning.rs:81:1 + --> $DIR/custom_code_classes_in_docs-warning.rs:79:1 | LL | / /// ```{(comment)} LL | | /// main; LL | | /// ``` | |_______^ -error: aborting due to 13 previous errors +error: aborting due to 11 previous errors diff --git a/tests/rustdoc-ui/feature-gate-custom_code_classes_in_docs.rs b/tests/rustdoc-ui/feature-gate-custom_code_classes_in_docs.rs index 4fcced86a3ba6..99263a944f810 100644 --- a/tests/rustdoc-ui/feature-gate-custom_code_classes_in_docs.rs +++ b/tests/rustdoc-ui/feature-gate-custom_code_classes_in_docs.rs @@ -12,7 +12,4 @@ pub struct Bar; /// ```ASN.1 /// int main(void) { return 0; } /// ``` -//~^^^ WARNING custom classes in code blocks will change behaviour -//~| NOTE see issue #79483 -//~| HELP add `#![feature(custom_code_classes_in_docs)]` to the crate attributes to enable pub struct Bar2; diff --git a/tests/rustdoc-ui/feature-gate-custom_code_classes_in_docs.stderr b/tests/rustdoc-ui/feature-gate-custom_code_classes_in_docs.stderr index c6db54b31284d..1a2360d9b3004 100644 --- a/tests/rustdoc-ui/feature-gate-custom_code_classes_in_docs.stderr +++ b/tests/rustdoc-ui/feature-gate-custom_code_classes_in_docs.stderr @@ -10,16 +10,5 @@ LL | | /// ``` = help: add `#![feature(custom_code_classes_in_docs)]` to the crate attributes to enable = note: found these custom classes: class=language-c -warning: custom classes in code blocks will change behaviour - --> $DIR/feature-gate-custom_code_classes_in_docs.rs:12:1 - | -LL | / /// ```ASN.1 -LL | | /// int main(void) { return 0; } -LL | | /// ``` - | |_______^ - | - = note: see issue #79483 for more information - = help: add `#![feature(custom_code_classes_in_docs)]` to the crate attributes to enable - -warning: 2 warnings emitted +warning: 1 warning emitted