From 8e10ffca656fb6473f8c73a0d1eeea98627be380 Mon Sep 17 00:00:00 2001 From: Boshen Date: Sun, 15 Sep 2024 17:23:11 +0800 Subject: [PATCH] feat(parser): calculate leading or trailing position for comments --- crates/oxc_ast/src/lib.rs | 2 +- crates/oxc_ast/src/trivia.rs | 57 ++++++- crates/oxc_parser/examples/parser.rs | 4 +- crates/oxc_parser/src/lexer/mod.rs | 1 + crates/oxc_parser/src/lexer/trivia_builder.rs | 156 +++++++++++++++++- crates/oxc_parser/src/lexer/whitespace.rs | 1 + 6 files changed, 210 insertions(+), 11 deletions(-) diff --git a/crates/oxc_ast/src/lib.rs b/crates/oxc_ast/src/lib.rs index 84fe417b50ae2e..006fa94352287d 100644 --- a/crates/oxc_ast/src/lib.rs +++ b/crates/oxc_ast/src/lib.rs @@ -61,7 +61,7 @@ pub use crate::{ ast_builder::AstBuilder, ast_builder_impl::NONE, ast_kind::{AstKind, AstType}, - trivia::{Comment, CommentKind, SortedComments, Trivias}, + trivia::{Comment, CommentKind, CommentPosition, SortedComments, Trivias}, visit::{Visit, VisitMut}, }; diff --git a/crates/oxc_ast/src/trivia.rs b/crates/oxc_ast/src/trivia.rs index 834545b8b12957..f9ffb8f6d77266 100644 --- a/crates/oxc_ast/src/trivia.rs +++ b/crates/oxc_ast/src/trivia.rs @@ -14,20 +14,63 @@ pub enum CommentKind { Block, } +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum CommentPosition { + /// Comments prior to a token until another token or trailing comment. + /// + /// e.g. + /// + /// ``` + /// /* leading */ token; + /// /* leading */ + /// // leading + /// token; + /// ``` + Leading, + + /// Comments tailing a token until a newline. + /// e.g. `token /* trailing */ // trailing` + Trailing, +} + /// Single or multiline comment -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, Eq, PartialEq)] pub struct Comment { /// The span of the comment text (without leading/trailing delimiters). pub span: Span, + /// Line or block comment pub kind: CommentKind, + + /// Leading or trailing comment + pub position: CommentPosition, + + /// Start of token this leading comment is attached to. + /// `/* Leading */ token` + /// ^ This start + /// NOTE: Trailing comment attachment is not computed yet. + pub attached_to: u32, + + /// Whether this comment has a preceding newline. + /// Used to avoid becoming a trailing comment in codegen. + pub preceded_by_newline: bool, + + /// Whether this comment has a tailing newline. + pub followed_by_newline: bool, } impl Comment { #[inline] pub fn new(start: u32, end: u32, kind: CommentKind) -> Self { let span = Span::new(start, end); - Self { span, kind } + Self { + span, + kind, + position: CommentPosition::Trailing, + attached_to: 0, + preceded_by_newline: false, + followed_by_newline: false, + } } pub fn is_line(self) -> bool { @@ -38,6 +81,14 @@ impl Comment { self.kind == CommentKind::Block } + pub fn is_leading(self) -> bool { + self.position == CommentPosition::Leading + } + + pub fn is_trailing(self) -> bool { + self.position == CommentPosition::Trailing + } + pub fn real_span(&self) -> Span { Span::new(self.real_span_start(), self.real_span_end()) } @@ -55,8 +106,6 @@ impl Comment { } } -impl CommentKind {} - /// Sorted set of unique trivia comments, in ascending order by starting position. pub type SortedComments = Box<[Comment]>; diff --git a/crates/oxc_parser/examples/parser.rs b/crates/oxc_parser/examples/parser.rs index fe3f8639881686..55638a38d6ca7d 100644 --- a/crates/oxc_parser/examples/parser.rs +++ b/crates/oxc_parser/examples/parser.rs @@ -35,8 +35,8 @@ fn main() -> Result<(), String> { if show_comments { println!("Comments:"); for comment in ret.trivias.comments() { - let s = comment.real_span().source_text(&source_text); - println!("{s}"); + // let s = comment.real_span().source_text(&source_text); + println!("{comment:?}"); } } diff --git a/crates/oxc_parser/src/lexer/mod.rs b/crates/oxc_parser/src/lexer/mod.rs index 307f7287c04aff..5a0d7a6c7b44f6 100644 --- a/crates/oxc_parser/src/lexer/mod.rs +++ b/crates/oxc_parser/src/lexer/mod.rs @@ -218,6 +218,7 @@ impl<'a> Lexer<'a> { self.token.end = self.offset(); debug_assert!(self.token.start <= self.token.end); let token = self.token; + self.trivia_builder.handle_token(token.start); self.token = Token::default(); token } diff --git a/crates/oxc_parser/src/lexer/trivia_builder.rs b/crates/oxc_parser/src/lexer/trivia_builder.rs index bb724ea0b70a4b..2bb051a6c4e343 100644 --- a/crates/oxc_parser/src/lexer/trivia_builder.rs +++ b/crates/oxc_parser/src/lexer/trivia_builder.rs @@ -1,13 +1,21 @@ -use oxc_ast::{Comment, CommentKind, Trivias}; +use oxc_ast::{Comment, CommentKind, CommentPosition, Trivias}; use oxc_span::Span; #[derive(Debug, Default)] pub struct TriviaBuilder { - // NOTE(lucab): This is a set of unique comments. Duplicated + // This is a set of unique comments. Duplicated // comments could be generated in case of rewind; they are // filtered out at insertion time. pub(crate) comments: Vec, + irregular_whitespaces: Vec, + + // states + /// index of processed comments + processed: usize, + + /// Saw a newline before this position + saw_newline: bool, } impl TriviaBuilder { @@ -15,6 +23,10 @@ impl TriviaBuilder { Trivias::new(self.comments.into_boxed_slice(), self.irregular_whitespaces) } + pub fn add_irregular_whitespace(&mut self, start: u32, end: u32) { + self.irregular_whitespaces.push(Span::new(start, end)); + } + pub fn add_single_line_comment(&mut self, start: u32, end: u32) { // skip leading `//` self.add_comment(Comment::new(start + 2, end, CommentKind::Line)); @@ -25,6 +37,34 @@ impl TriviaBuilder { self.add_comment(Comment::new(start + 2, end - 2, CommentKind::Block)); } + // For block comments only. This function is not called after line comments because the lexer skips + // newline after line comments. + pub fn handle_newline(&mut self) { + // The last unprocessed comment is on a newline. + if self.processed < self.comments.len() { + if let Some(last_comment) = self.comments.last_mut() { + last_comment.followed_by_newline = true; + } + } + if !self.saw_newline { + self.processed = self.comments.len(); + } + self.saw_newline = true; + } + + pub fn handle_token(&mut self, token_start: u32) { + let len = self.comments.len(); + if self.processed < len { + // All unprocess preceding comments are leading comments attached to this token start. + for comment in &mut self.comments[self.processed..] { + comment.position = CommentPosition::Leading; + comment.attached_to = token_start; + } + self.processed = len; + } + self.saw_newline = false; + } + fn add_comment(&mut self, comment: Comment) { // The comments array is an ordered vec, only add the comment if its not added before, // to avoid situations where the parser needs to rewind and tries to reinsert the comment. @@ -33,10 +73,118 @@ impl TriviaBuilder { return; } } + + let mut comment = comment; + // This newly added comment may be preceded by a newline. + comment.preceded_by_newline = self.saw_newline; + if comment.is_line() { + // A line comment is always followed by a newline. This is never set in `handle_newline`. + comment.followed_by_newline = true; + // A line comment is trailing when it is no preceded by a newline. + if !self.saw_newline { + self.processed = self.comments.len() + 1; // +1 to include this comment. + } + self.saw_newline = true; + } + self.comments.push(comment); } +} + +#[cfg(test)] +mod test { + use crate::Parser; + use oxc_allocator::Allocator; + use oxc_ast::{Comment, CommentKind, CommentPosition}; + use oxc_span::{SourceType, Span}; + + #[test] + fn comment_attachments() { + let allocator = Allocator::default(); + let source_type = SourceType::default(); + let source_text = " + /* Leading 1 */ + // Leading 2 + /* Leading 3 */ token /* Trailing 1 */ // Trailing 2 + // Leading of EOF token + "; + let ret = Parser::new(&allocator, source_text, source_type).parse(); + let comments = ret.trivias.comments().copied().collect::>(); + let expected = [ + Comment { + span: Span::new(11, 22), + kind: CommentKind::Block, + position: CommentPosition::Leading, + attached_to: 70, + preceded_by_newline: true, + followed_by_newline: true, + }, + Comment { + span: Span::new(35, 45), + kind: CommentKind::Line, + position: CommentPosition::Leading, + attached_to: 70, + preceded_by_newline: true, + followed_by_newline: true, + }, + Comment { + span: Span::new(56, 67), + kind: CommentKind::Block, + position: CommentPosition::Leading, + attached_to: 70, + preceded_by_newline: true, + followed_by_newline: false, + }, + Comment { + span: Span::new(78, 90), + kind: CommentKind::Block, + position: CommentPosition::Trailing, + attached_to: 0, + preceded_by_newline: false, + followed_by_newline: false, + }, + Comment { + span: Span::new(95, 106), + kind: CommentKind::Line, + position: CommentPosition::Trailing, + attached_to: 0, + preceded_by_newline: false, + followed_by_newline: true, + }, + Comment { + span: Span::new(117, 138), + kind: CommentKind::Line, + position: CommentPosition::Leading, + attached_to: 147, + preceded_by_newline: true, + followed_by_newline: true, + }, + ]; + + assert_eq!(comments.len(), expected.len()); + for (comment, expected) in comments.iter().copied().zip(expected) { + assert_eq!(comment, expected, "{}", comment.real_span().source_text(source_text)); + } + } - pub fn add_irregular_whitespace(&mut self, start: u32, end: u32) { - self.irregular_whitespaces.push(Span::new(start, end)); + #[test] + fn comment_attachments2() { + let allocator = Allocator::default(); + let source_type = SourceType::default(); + let source_text = " +/* Leading 1 */ +token + "; + let ret = Parser::new(&allocator, source_text, source_type).parse(); + let comments = ret.trivias.comments().copied().collect::>(); + let expected = vec![Comment { + span: Span::new(3, 14), + kind: CommentKind::Block, + position: CommentPosition::Leading, + attached_to: 17, + preceded_by_newline: true, + followed_by_newline: true, + }]; + assert_eq!(comments, expected); } } diff --git a/crates/oxc_parser/src/lexer/whitespace.rs b/crates/oxc_parser/src/lexer/whitespace.rs index 577c12aec2bb3f..c6e8403e25b593 100644 --- a/crates/oxc_parser/src/lexer/whitespace.rs +++ b/crates/oxc_parser/src/lexer/whitespace.rs @@ -9,6 +9,7 @@ static NOT_REGULAR_WHITESPACE_OR_LINE_BREAK_TABLE: SafeByteMatchTable = impl<'a> Lexer<'a> { pub(super) fn line_break_handler(&mut self) -> Kind { self.token.is_on_new_line = true; + self.trivia_builder.handle_newline(); // Indentation is common after a line break. // Consume it, along with any further line breaks.