-
Notifications
You must be signed in to change notification settings - Fork 13k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Auto merge of #77119 - GuillaumeGomez:unclosed-html-tag-lint, r=jyn514
Unclosed html tag lint Part of #67799. I think `@ollie27` will be interested (`@Manishearth` too since they opened the issue ;) ). r? `@jyn514`
- Loading branch information
Showing
11 changed files
with
428 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,190 @@ | ||
use super::{span_of_attrs, Pass}; | ||
use crate::clean::*; | ||
use crate::core::DocContext; | ||
use crate::fold::DocFolder; | ||
use crate::html::markdown::opts; | ||
use core::ops::Range; | ||
use pulldown_cmark::{Event, Parser}; | ||
use rustc_feature::UnstableFeatures; | ||
use rustc_session::lint; | ||
|
||
pub const CHECK_INVALID_HTML_TAGS: Pass = Pass { | ||
name: "check-invalid-html-tags", | ||
run: check_invalid_html_tags, | ||
description: "detects invalid HTML tags in doc comments", | ||
}; | ||
|
||
struct InvalidHtmlTagsLinter<'a, 'tcx> { | ||
cx: &'a DocContext<'tcx>, | ||
} | ||
|
||
impl<'a, 'tcx> InvalidHtmlTagsLinter<'a, 'tcx> { | ||
fn new(cx: &'a DocContext<'tcx>) -> Self { | ||
InvalidHtmlTagsLinter { cx } | ||
} | ||
} | ||
|
||
pub fn check_invalid_html_tags(krate: Crate, cx: &DocContext<'_>) -> Crate { | ||
if !UnstableFeatures::from_environment().is_nightly_build() { | ||
krate | ||
} else { | ||
let mut coll = InvalidHtmlTagsLinter::new(cx); | ||
|
||
coll.fold_crate(krate) | ||
} | ||
} | ||
|
||
const ALLOWED_UNCLOSED: &[&str] = &[ | ||
"area", "base", "br", "col", "embed", "hr", "img", "input", "keygen", "link", "meta", "param", | ||
"source", "track", "wbr", | ||
]; | ||
|
||
fn drop_tag( | ||
tags: &mut Vec<(String, Range<usize>)>, | ||
tag_name: String, | ||
range: Range<usize>, | ||
f: &impl Fn(&str, &Range<usize>), | ||
) { | ||
let tag_name_low = tag_name.to_lowercase(); | ||
if let Some(pos) = tags.iter().rposition(|(t, _)| t.to_lowercase() == tag_name_low) { | ||
// If the tag is nested inside a "<script>" or a "<style>" tag, no warning should | ||
// be emitted. | ||
let should_not_warn = tags.iter().take(pos + 1).any(|(at, _)| { | ||
let at = at.to_lowercase(); | ||
at == "script" || at == "style" | ||
}); | ||
for (last_tag_name, last_tag_span) in tags.drain(pos + 1..) { | ||
if should_not_warn { | ||
continue; | ||
} | ||
let last_tag_name_low = last_tag_name.to_lowercase(); | ||
if ALLOWED_UNCLOSED.iter().any(|&at| at == &last_tag_name_low) { | ||
continue; | ||
} | ||
// `tags` is used as a queue, meaning that everything after `pos` is included inside it. | ||
// So `<h2><h3></h2>` will look like `["h2", "h3"]`. So when closing `h2`, we will still | ||
// have `h3`, meaning the tag wasn't closed as it should have. | ||
f(&format!("unclosed HTML tag `{}`", last_tag_name), &last_tag_span); | ||
} | ||
// Remove the `tag_name` that was originally closed | ||
tags.pop(); | ||
} else { | ||
// It can happen for example in this case: `<h2></script></h2>` (the `h2` tag isn't required | ||
// but it helps for the visualization). | ||
f(&format!("unopened HTML tag `{}`", tag_name), &range); | ||
} | ||
} | ||
|
||
fn extract_tag( | ||
tags: &mut Vec<(String, Range<usize>)>, | ||
text: &str, | ||
range: Range<usize>, | ||
f: &impl Fn(&str, &Range<usize>), | ||
) { | ||
let mut iter = text.char_indices().peekable(); | ||
|
||
while let Some((start_pos, c)) = iter.next() { | ||
if c == '<' { | ||
let mut tag_name = String::new(); | ||
let mut is_closing = false; | ||
let mut prev_pos = start_pos; | ||
loop { | ||
let (pos, c) = match iter.peek() { | ||
Some((pos, c)) => (*pos, *c), | ||
// In case we reached the of the doc comment, we want to check that it's an | ||
// unclosed HTML tag. For example "/// <h3". | ||
None => (prev_pos, '\0'), | ||
}; | ||
prev_pos = pos; | ||
// Checking if this is a closing tag (like `</a>` for `<a>`). | ||
if c == '/' && tag_name.is_empty() { | ||
is_closing = true; | ||
} else if c.is_ascii_alphanumeric() { | ||
tag_name.push(c); | ||
} else { | ||
if !tag_name.is_empty() { | ||
let mut r = | ||
Range { start: range.start + start_pos, end: range.start + pos }; | ||
if c == '>' { | ||
// In case we have a tag without attribute, we can consider the span to | ||
// refer to it fully. | ||
r.end += 1; | ||
} | ||
if is_closing { | ||
// In case we have "</div >" or even "</div >". | ||
if c != '>' { | ||
if !c.is_whitespace() { | ||
// It seems like it's not a valid HTML tag. | ||
break; | ||
} | ||
let mut found = false; | ||
for (new_pos, c) in text[pos..].char_indices() { | ||
if !c.is_whitespace() { | ||
if c == '>' { | ||
r.end = range.start + new_pos + 1; | ||
found = true; | ||
} | ||
break; | ||
} | ||
} | ||
if !found { | ||
break; | ||
} | ||
} | ||
drop_tag(tags, tag_name, r, f); | ||
} else { | ||
tags.push((tag_name, r)); | ||
} | ||
} | ||
break; | ||
} | ||
iter.next(); | ||
} | ||
} | ||
} | ||
} | ||
|
||
impl<'a, 'tcx> DocFolder for InvalidHtmlTagsLinter<'a, 'tcx> { | ||
fn fold_item(&mut self, item: Item) -> Option<Item> { | ||
let hir_id = match self.cx.as_local_hir_id(item.def_id) { | ||
Some(hir_id) => hir_id, | ||
None => { | ||
// If non-local, no need to check anything. | ||
return self.fold_item_recur(item); | ||
} | ||
}; | ||
let dox = item.attrs.collapsed_doc_value().unwrap_or_default(); | ||
if !dox.is_empty() { | ||
let cx = &self.cx; | ||
let report_diag = |msg: &str, range: &Range<usize>| { | ||
let sp = match super::source_span_for_markdown_range(cx, &dox, range, &item.attrs) { | ||
Some(sp) => sp, | ||
None => span_of_attrs(&item.attrs).unwrap_or(item.source.span()), | ||
}; | ||
cx.tcx.struct_span_lint_hir(lint::builtin::INVALID_HTML_TAGS, hir_id, sp, |lint| { | ||
lint.build(msg).emit() | ||
}); | ||
}; | ||
|
||
let mut tags = Vec::new(); | ||
|
||
let p = Parser::new_ext(&dox, opts()).into_offset_iter(); | ||
|
||
for (event, range) in p { | ||
match event { | ||
Event::Html(text) => extract_tag(&mut tags, &text, range, &report_diag), | ||
_ => {} | ||
} | ||
} | ||
|
||
for (tag, range) in tags.iter().filter(|(t, _)| { | ||
let t = t.to_lowercase(); | ||
ALLOWED_UNCLOSED.iter().find(|&&at| at == t).is_none() | ||
}) { | ||
report_diag(&format!("unclosed HTML tag `{}`", tag), range); | ||
} | ||
} | ||
|
||
self.fold_item_recur(item) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.