From d29f0d23c3624047a3f3671a8e352783e8796373 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 12 May 2019 19:55:16 +0300 Subject: [PATCH 1/3] Move token tree related lexer state to a separate struct We only used a bunch of fields when tokenizing into a token tree, so let's move them out of the base lexer --- src/libsyntax/parse/lexer/mod.rs | 14 +--- src/libsyntax/parse/lexer/tokentrees.rs | 96 +++++++++++++++++-------- src/libsyntax/parse/mod.rs | 9 +-- 3 files changed, 71 insertions(+), 48 deletions(-) diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 2882acb0e780..60494a6a2bdc 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -66,15 +66,7 @@ pub struct StringReader<'a> { span: Span, /// The raw source span which *does not* take `override_span` into account span_src_raw: Span, - /// Stack of open delimiters and their spans. Used for error message. - open_braces: Vec<(token::DelimToken, Span)>, - crate unmatched_braces: Vec, - /// The type and spans for all braces - /// - /// Used only for error recovery when arriving to EOF with mismatched braces. - matching_delim_spans: Vec<(token::DelimToken, Span, Span)>, - crate override_span: Option, - last_unclosed_found_span: Option, + override_span: Option, } impl<'a> StringReader<'a> { @@ -254,11 +246,7 @@ impl<'a> StringReader<'a> { token: token::Eof, span: syntax_pos::DUMMY_SP, span_src_raw: syntax_pos::DUMMY_SP, - open_braces: Vec::new(), - unmatched_braces: Vec::new(), - matching_delim_spans: Vec::new(), override_span, - last_unclosed_found_span: None, } } diff --git a/src/libsyntax/parse/lexer/tokentrees.rs b/src/libsyntax/parse/lexer/tokentrees.rs index 0db36c84cdfe..a6e176c02a09 100644 --- a/src/libsyntax/parse/lexer/tokentrees.rs +++ b/src/libsyntax/parse/lexer/tokentrees.rs @@ -1,14 +1,42 @@ +use syntax_pos::Span; + use crate::print::pprust::token_to_string; use crate::parse::lexer::{StringReader, UnmatchedBrace}; use crate::parse::{token, PResult}; use crate::tokenstream::{DelimSpan, IsJoint::*, TokenStream, TokenTree, TreeAndJoint}; impl<'a> StringReader<'a> { + crate fn into_token_trees(self) -> (PResult<'a, TokenStream>, Vec) { + let mut tt_reader = TokenTreesReader { + string_reader: self, + open_braces: Vec::new(), + unmatched_braces: Vec::new(), + matching_delim_spans: Vec::new(), + last_unclosed_found_span: None, + }; + let res = tt_reader.parse_all_token_trees(); + (res, tt_reader.unmatched_braces) + } +} + +struct TokenTreesReader<'a> { + string_reader: StringReader<'a>, + /// Stack of open delimiters and their spans. Used for error message. + open_braces: Vec<(token::DelimToken, Span)>, + unmatched_braces: Vec, + /// The type and spans for all braces + /// + /// Used only for error recovery when arriving to EOF with mismatched braces. + matching_delim_spans: Vec<(token::DelimToken, Span, Span)>, + last_unclosed_found_span: Option, +} + +impl<'a> TokenTreesReader<'a> { // Parse a stream of tokens into a list of `TokenTree`s, up to an `Eof`. - crate fn parse_all_token_trees(&mut self) -> PResult<'a, TokenStream> { + fn parse_all_token_trees(&mut self) -> PResult<'a, TokenStream> { let mut tts = Vec::new(); - while self.token != token::Eof { + while self.string_reader.token != token::Eof { tts.push(self.parse_token_tree()?); } @@ -19,7 +47,7 @@ impl<'a> StringReader<'a> { fn parse_token_trees_until_close_delim(&mut self) -> TokenStream { let mut tts = vec![]; loop { - if let token::CloseDelim(..) = self.token { + if let token::CloseDelim(..) = self.string_reader.token { return TokenStream::new(tts); } @@ -34,11 +62,12 @@ impl<'a> StringReader<'a> { } fn parse_token_tree(&mut self) -> PResult<'a, TreeAndJoint> { - let sm = self.sess.source_map(); - match self.token { + let sm = self.string_reader.sess.source_map(); + match self.string_reader.token { token::Eof => { let msg = "this file contains an un-closed delimiter"; - let mut err = self.sess.span_diagnostic.struct_span_err(self.span, msg); + let mut err = self.string_reader.sess.span_diagnostic + .struct_span_err(self.span(), msg); for &(_, sp) in &self.open_braces { err.span_label(sp, "un-closed delimiter"); } @@ -46,13 +75,12 @@ impl<'a> StringReader<'a> { if let Some((delim, _)) = self.open_braces.last() { if let Some((_, open_sp, close_sp)) = self.matching_delim_spans.iter() .filter(|(d, open_sp, close_sp)| { - - if let Some(close_padding) = sm.span_to_margin(*close_sp) { - if let Some(open_padding) = sm.span_to_margin(*open_sp) { - return delim == d && close_padding != open_padding; + if let Some(close_padding) = sm.span_to_margin(*close_sp) { + if let Some(open_padding) = sm.span_to_margin(*open_sp) { + return delim == d && close_padding != open_padding; + } } - } - false + false }).next() // these are in reverse order as they get inserted on close, but { // we want the last open/first close err.span_label( @@ -69,11 +97,11 @@ impl<'a> StringReader<'a> { }, token::OpenDelim(delim) => { // The span for beginning of the delimited section - let pre_span = self.span; + let pre_span = self.span(); // Parse the open delimiter. - self.open_braces.push((delim, self.span)); - self.real_token(); + self.open_braces.push((delim, self.span())); + self.string_reader.real_token(); // Parse the token trees within the delimiters. // We stop at any delimiter so we can try to recover if the user @@ -81,9 +109,9 @@ impl<'a> StringReader<'a> { let tts = self.parse_token_trees_until_close_delim(); // Expand to cover the entire delimited token tree - let delim_span = DelimSpan::from_pair(pre_span, self.span); + let delim_span = DelimSpan::from_pair(pre_span, self.span()); - match self.token { + match self.string_reader.token { // Correct delimiter. token::CloseDelim(d) if d == delim => { let (open_brace, open_brace_span) = self.open_braces.pop().unwrap(); @@ -93,26 +121,26 @@ impl<'a> StringReader<'a> { self.matching_delim_spans.clear(); } else { self.matching_delim_spans.push( - (open_brace, open_brace_span, self.span), + (open_brace, open_brace_span, self.span()), ); } // Parse the close delimiter. - self.real_token(); + self.string_reader.real_token(); } // Incorrect delimiter. token::CloseDelim(other) => { let mut unclosed_delimiter = None; let mut candidate = None; - if self.last_unclosed_found_span != Some(self.span) { + if self.last_unclosed_found_span != Some(self.span()) { // do not complain about the same unclosed delimiter multiple times - self.last_unclosed_found_span = Some(self.span); + self.last_unclosed_found_span = Some(self.span()); // This is a conservative error: only report the last unclosed // delimiter. The previous unclosed delimiters could actually be // closed! The parser just hasn't gotten to them yet. if let Some(&(_, sp)) = self.open_braces.last() { unclosed_delimiter = Some(sp); }; - if let Some(current_padding) = sm.span_to_margin(self.span) { + if let Some(current_padding) = sm.span_to_margin(self.span()) { for (brace, brace_span) in &self.open_braces { if let Some(padding) = sm.span_to_margin(*brace_span) { // high likelihood of these two corresponding @@ -126,7 +154,7 @@ impl<'a> StringReader<'a> { self.unmatched_braces.push(UnmatchedBrace { expected_delim: tok, found_delim: other, - found_span: self.span, + found_span: self.span(), unclosed_span: unclosed_delimiter, candidate_span: candidate, }); @@ -142,7 +170,7 @@ impl<'a> StringReader<'a> { // bar(baz( // } // Incorrect delimiter but matches the earlier `{` if !self.open_braces.iter().any(|&(b, _)| b == other) { - self.real_token(); + self.string_reader.real_token(); } } token::Eof => { @@ -162,22 +190,28 @@ impl<'a> StringReader<'a> { token::CloseDelim(_) => { // An unexpected closing delimiter (i.e., there is no // matching opening delimiter). - let token_str = token_to_string(&self.token); + let token_str = token_to_string(&self.string_reader.token); let msg = format!("unexpected close delimiter: `{}`", token_str); - let mut err = self.sess.span_diagnostic.struct_span_err(self.span, &msg); - err.span_label(self.span, "unexpected close delimiter"); + let mut err = self.string_reader.sess.span_diagnostic + .struct_span_err(self.span(), &msg); + err.span_label(self.span(), "unexpected close delimiter"); Err(err) }, _ => { - let tt = TokenTree::Token(self.span, self.token.clone()); + let tt = TokenTree::Token(self.span(), self.string_reader.token.clone()); // Note that testing for joint-ness here is done via the raw // source span as the joint-ness is a property of the raw source // rather than wanting to take `override_span` into account. - let raw = self.span_src_raw; - self.real_token(); - let is_joint = raw.hi() == self.span_src_raw.lo() && token::is_op(&self.token); + let raw = self.string_reader.span_src_raw; + self.string_reader.real_token(); + let is_joint = raw.hi() == self.string_reader.span_src_raw.lo() + && token::is_op(&self.string_reader.token); Ok((tt, if is_joint { Joint } else { NonJoint })) } } } + + fn span(&self) -> Span { + self.string_reader.span + } } diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index be44b964ba5a..1ddafb969c4b 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -295,7 +295,7 @@ pub fn source_file_to_stream( } /// Given a source file, produces a sequence of token trees. Returns any buffered errors from -/// parsing the token tream. +/// parsing the token stream. pub fn maybe_file_to_stream( sess: &ParseSess, source_file: Lrc, @@ -303,14 +303,15 @@ pub fn maybe_file_to_stream( ) -> Result<(TokenStream, Vec), Vec> { let mut srdr = lexer::StringReader::new_or_buffered_errs(sess, source_file, override_span)?; srdr.real_token(); + let (token_trees, unmatched_braces) = srdr.into_token_trees(); - match srdr.parse_all_token_trees() { - Ok(stream) => Ok((stream, srdr.unmatched_braces)), + match token_trees { + Ok(stream) => Ok((stream, unmatched_braces)), Err(err) => { let mut buffer = Vec::with_capacity(1); err.buffer(&mut buffer); // Not using `emit_unclosed_delims` to use `db.buffer` - for unmatched in srdr.unmatched_braces { + for unmatched in unmatched_braces { let mut db = sess.span_diagnostic.struct_span_err(unmatched.found_span, &format!( "incorrect close delimiter: `{}`", token_to_string(&token::Token::CloseDelim(unmatched.found_delim)), From b91e0a378690871fa744768f38d42bd90830bcd0 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 13 May 2019 12:06:37 +0300 Subject: [PATCH 2/3] move span and token to tt reader --- src/libsyntax/parse/lexer/mod.rs | 12 ----- src/libsyntax/parse/lexer/tokentrees.rs | 60 ++++++++++++++----------- src/libsyntax/parse/mod.rs | 3 +- 3 files changed, 35 insertions(+), 40 deletions(-) diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 60494a6a2bdc..9caa9ea807c1 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -62,10 +62,6 @@ pub struct StringReader<'a> { // cache a direct reference to the source text, so that we don't have to // retrieve it via `self.source_file.src.as_ref().unwrap()` all the time. src: Lrc, - token: token::Token, - span: Span, - /// The raw source span which *does not* take `override_span` into account - span_src_raw: Span, override_span: Option, } @@ -113,8 +109,6 @@ impl<'a> StringReader<'a> { sp: self.peek_span, }; self.advance_token()?; - self.span_src_raw = self.peek_span_src_raw; - Ok(ret_val) } @@ -151,9 +145,6 @@ impl<'a> StringReader<'a> { } } - self.token = t.tok.clone(); - self.span = t.sp; - Ok(t) } @@ -243,9 +234,6 @@ impl<'a> StringReader<'a> { peek_span_src_raw: syntax_pos::DUMMY_SP, src, fatal_errs: Vec::new(), - token: token::Eof, - span: syntax_pos::DUMMY_SP, - span_src_raw: syntax_pos::DUMMY_SP, override_span, } } diff --git a/src/libsyntax/parse/lexer/tokentrees.rs b/src/libsyntax/parse/lexer/tokentrees.rs index a6e176c02a09..1070d6dcb1b3 100644 --- a/src/libsyntax/parse/lexer/tokentrees.rs +++ b/src/libsyntax/parse/lexer/tokentrees.rs @@ -9,6 +9,8 @@ impl<'a> StringReader<'a> { crate fn into_token_trees(self) -> (PResult<'a, TokenStream>, Vec) { let mut tt_reader = TokenTreesReader { string_reader: self, + token: token::Eof, + span: syntax_pos::DUMMY_SP, open_braces: Vec::new(), unmatched_braces: Vec::new(), matching_delim_spans: Vec::new(), @@ -21,6 +23,8 @@ impl<'a> StringReader<'a> { struct TokenTreesReader<'a> { string_reader: StringReader<'a>, + token: token::Token, + span: Span, /// Stack of open delimiters and their spans. Used for error message. open_braces: Vec<(token::DelimToken, Span)>, unmatched_braces: Vec, @@ -36,7 +40,8 @@ impl<'a> TokenTreesReader<'a> { fn parse_all_token_trees(&mut self) -> PResult<'a, TokenStream> { let mut tts = Vec::new(); - while self.string_reader.token != token::Eof { + self.real_token(); + while self.token != token::Eof { tts.push(self.parse_token_tree()?); } @@ -47,7 +52,7 @@ impl<'a> TokenTreesReader<'a> { fn parse_token_trees_until_close_delim(&mut self) -> TokenStream { let mut tts = vec![]; loop { - if let token::CloseDelim(..) = self.string_reader.token { + if let token::CloseDelim(..) = self.token { return TokenStream::new(tts); } @@ -63,11 +68,11 @@ impl<'a> TokenTreesReader<'a> { fn parse_token_tree(&mut self) -> PResult<'a, TreeAndJoint> { let sm = self.string_reader.sess.source_map(); - match self.string_reader.token { + match self.token { token::Eof => { let msg = "this file contains an un-closed delimiter"; let mut err = self.string_reader.sess.span_diagnostic - .struct_span_err(self.span(), msg); + .struct_span_err(self.span, msg); for &(_, sp) in &self.open_braces { err.span_label(sp, "un-closed delimiter"); } @@ -97,11 +102,11 @@ impl<'a> TokenTreesReader<'a> { }, token::OpenDelim(delim) => { // The span for beginning of the delimited section - let pre_span = self.span(); + let pre_span = self.span; // Parse the open delimiter. - self.open_braces.push((delim, self.span())); - self.string_reader.real_token(); + self.open_braces.push((delim, self.span)); + self.real_token(); // Parse the token trees within the delimiters. // We stop at any delimiter so we can try to recover if the user @@ -109,9 +114,9 @@ impl<'a> TokenTreesReader<'a> { let tts = self.parse_token_trees_until_close_delim(); // Expand to cover the entire delimited token tree - let delim_span = DelimSpan::from_pair(pre_span, self.span()); + let delim_span = DelimSpan::from_pair(pre_span, self.span); - match self.string_reader.token { + match self.token { // Correct delimiter. token::CloseDelim(d) if d == delim => { let (open_brace, open_brace_span) = self.open_braces.pop().unwrap(); @@ -121,26 +126,26 @@ impl<'a> TokenTreesReader<'a> { self.matching_delim_spans.clear(); } else { self.matching_delim_spans.push( - (open_brace, open_brace_span, self.span()), + (open_brace, open_brace_span, self.span), ); } // Parse the close delimiter. - self.string_reader.real_token(); + self.real_token(); } // Incorrect delimiter. token::CloseDelim(other) => { let mut unclosed_delimiter = None; let mut candidate = None; - if self.last_unclosed_found_span != Some(self.span()) { + if self.last_unclosed_found_span != Some(self.span) { // do not complain about the same unclosed delimiter multiple times - self.last_unclosed_found_span = Some(self.span()); + self.last_unclosed_found_span = Some(self.span); // This is a conservative error: only report the last unclosed // delimiter. The previous unclosed delimiters could actually be // closed! The parser just hasn't gotten to them yet. if let Some(&(_, sp)) = self.open_braces.last() { unclosed_delimiter = Some(sp); }; - if let Some(current_padding) = sm.span_to_margin(self.span()) { + if let Some(current_padding) = sm.span_to_margin(self.span) { for (brace, brace_span) in &self.open_braces { if let Some(padding) = sm.span_to_margin(*brace_span) { // high likelihood of these two corresponding @@ -154,7 +159,7 @@ impl<'a> TokenTreesReader<'a> { self.unmatched_braces.push(UnmatchedBrace { expected_delim: tok, found_delim: other, - found_span: self.span(), + found_span: self.span, unclosed_span: unclosed_delimiter, candidate_span: candidate, }); @@ -170,7 +175,7 @@ impl<'a> TokenTreesReader<'a> { // bar(baz( // } // Incorrect delimiter but matches the earlier `{` if !self.open_braces.iter().any(|&(b, _)| b == other) { - self.string_reader.real_token(); + self.real_token(); } } token::Eof => { @@ -190,28 +195,31 @@ impl<'a> TokenTreesReader<'a> { token::CloseDelim(_) => { // An unexpected closing delimiter (i.e., there is no // matching opening delimiter). - let token_str = token_to_string(&self.string_reader.token); + let token_str = token_to_string(&self.token); let msg = format!("unexpected close delimiter: `{}`", token_str); let mut err = self.string_reader.sess.span_diagnostic - .struct_span_err(self.span(), &msg); - err.span_label(self.span(), "unexpected close delimiter"); + .struct_span_err(self.span, &msg); + err.span_label(self.span, "unexpected close delimiter"); Err(err) }, _ => { - let tt = TokenTree::Token(self.span(), self.string_reader.token.clone()); + let tt = TokenTree::Token(self.span, self.token.clone()); // Note that testing for joint-ness here is done via the raw // source span as the joint-ness is a property of the raw source // rather than wanting to take `override_span` into account. - let raw = self.string_reader.span_src_raw; - self.string_reader.real_token(); - let is_joint = raw.hi() == self.string_reader.span_src_raw.lo() - && token::is_op(&self.string_reader.token); + let raw = self.string_reader.peek_span_src_raw; + self.real_token(); + let is_joint = raw.hi() == self.string_reader.peek_span_src_raw.lo() + && token::is_op(&self.token); Ok((tt, if is_joint { Joint } else { NonJoint })) } } } - fn span(&self) -> Span { - self.string_reader.span + fn real_token(&mut self) { + let t = self.string_reader.real_token(); + self.token = t.tok; + self.span = t.sp; } } + diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index 1ddafb969c4b..4a9a7aec6add 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -301,8 +301,7 @@ pub fn maybe_file_to_stream( source_file: Lrc, override_span: Option, ) -> Result<(TokenStream, Vec), Vec> { - let mut srdr = lexer::StringReader::new_or_buffered_errs(sess, source_file, override_span)?; - srdr.real_token(); + let srdr = lexer::StringReader::new_or_buffered_errs(sess, source_file, override_span)?; let (token_trees, unmatched_braces) = srdr.into_token_trees(); match token_trees { From e249f2e526cca687b78a766769c481cfb638f02e Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 13 May 2019 14:30:18 +0300 Subject: [PATCH 3/3] move raw span to tt reader See https://github.com/rust-lang/rust/pull/50838/files#r283296243 for explanation how jointness checking works with *next* pair --- src/libsyntax/parse/lexer/tokentrees.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/libsyntax/parse/lexer/tokentrees.rs b/src/libsyntax/parse/lexer/tokentrees.rs index 1070d6dcb1b3..4bfc5bb16c0b 100644 --- a/src/libsyntax/parse/lexer/tokentrees.rs +++ b/src/libsyntax/parse/lexer/tokentrees.rs @@ -207,6 +207,8 @@ impl<'a> TokenTreesReader<'a> { // Note that testing for joint-ness here is done via the raw // source span as the joint-ness is a property of the raw source // rather than wanting to take `override_span` into account. + // Additionally, we actually check if the *next* pair of tokens + // is joint, but this is equivalent to checking the current pair. let raw = self.string_reader.peek_span_src_raw; self.real_token(); let is_joint = raw.hi() == self.string_reader.peek_span_src_raw.lo() @@ -222,4 +224,3 @@ impl<'a> TokenTreesReader<'a> { self.span = t.sp; } } -