Skip to content

Commit

Permalink
Implement re-lexing logic for better error recovery
Browse files Browse the repository at this point in the history
  • Loading branch information
dhruvmanila committed Jun 12, 2024
1 parent 60ea72a commit 9e56495
Show file tree
Hide file tree
Showing 13 changed files with 102 additions and 17 deletions.
41 changes: 41 additions & 0 deletions crates/ruff_python_parser/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1307,6 +1307,47 @@ impl<'src> Lexer<'src> {
}
}

/// Re-lex the current token in the context of a logical line.
///
/// Returns a boolean indicating that whether the new current token is different than the
/// previous current token.
///
/// This method is a no-op if the lexer isn't in a parenthesized context.
pub(crate) fn re_lex_logical_token(&mut self) -> bool {
if self.nesting == 0 {
return false;
}

// Reduce the nesting level because the parser recovered from an error inside list parsing.
self.nesting -= 1;

let current_position = self.current_range().start();
let reverse_chars = self.source[..current_position.to_usize()].chars().rev();
let mut new_position = current_position;
let mut has_newline = false;

for ch in reverse_chars {
if is_python_whitespace(ch) {
new_position -= ch.text_len();
} else if matches!(ch, '\n' | '\r') {
has_newline |= true;
new_position -= ch.text_len();
} else {
break;
}
}

if new_position != current_position && has_newline {
self.cursor = Cursor::new(self.source);
self.cursor.skip_bytes(new_position.to_usize());
self.state = State::Other;
self.next_token();
true
} else {
false
}
}

#[inline]
fn token_range(&self) -> TextRange {
let end = self.offset();
Expand Down
12 changes: 11 additions & 1 deletion crates/ruff_python_parser/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,10 @@ impl<'src> Parser<'src> {
}

self.expect(TokenKind::Comma);
} else if recovery_context_kind.is_list_terminator(self) {
} else if recovery_context_kind
.list_terminator_kind(self)
.is_some_and(ListTerminatorKind::is_regular)
{
break;
} else {
// Not a recognised element. Add an error and either skip the token or break
Expand All @@ -570,6 +573,7 @@ impl<'src> Parser<'src> {
// Run the error recovery: This also handles the case when an element is missing
// between two commas: `a,,b`
if self.is_enclosing_list_element_or_terminator() {
self.tokens.re_lex_logical_token();
break;
}

Expand Down Expand Up @@ -786,6 +790,12 @@ enum ListTerminatorKind {
ErrorRecovery,
}

impl ListTerminatorKind {
const fn is_regular(self) -> bool {
matches!(self, ListTerminatorKind::Regular)
}
}

#[derive(Copy, Clone, Debug)]
enum RecoveryContextKind {
/// When parsing a list of statements at the module level i.e., at the top level of a file.
Expand Down
19 changes: 18 additions & 1 deletion crates/ruff_python_parser/src/token_source.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use ruff_text_size::{TextRange, TextSize};
use ruff_text_size::{Ranged, TextRange, TextSize};

use crate::lexer::{Lexer, LexerCheckpoint, LexicalError, Token, TokenFlags, TokenValue};
use crate::{Mode, TokenKind};
Expand Down Expand Up @@ -58,6 +58,23 @@ impl<'src> TokenSource<'src> {
self.lexer.take_value()
}

/// Calls the underying [`re_lex_logical_token`] method on the lexer and updates the token
/// vector accordingly.
///
/// [`re_lex_logical_token`]: Lexer::re_lex_logical_token
pub(crate) fn re_lex_logical_token(&mut self) {
if self.lexer.re_lex_logical_token() {
let current_start = self.current_range().start();
while self
.tokens
.last()
.is_some_and(|last| last.start() >= current_start)
{
self.tokens.pop();
}
}
}

/// Returns the next non-trivia token without consuming it.
///
/// Use [`peek2`] to get the next two tokens.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,9 @@ Module(


|
1 | call(
| ^ Syntax Error: Expected ')', found newline
2 |
3 | def foo():
4 | pass
| Syntax Error: unexpected EOF while parsing
|
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,9 @@ Module(


|
1 | call(x
| ^ Syntax Error: Expected ')', found newline
2 |
3 | def foo():
4 | pass
| Syntax Error: unexpected EOF while parsing
|
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,9 @@ Module(


|
1 | call(x,
| ^ Syntax Error: Expected ')', found newline
2 |
3 | def foo():
4 | pass
| Syntax Error: unexpected EOF while parsing
|
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,9 @@ Module(


|
1 | {x: 1,
| ^ Syntax Error: Expected '}', found newline
2 |
3 | def foo():
4 | pass
| Syntax Error: unexpected EOF while parsing
|
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,11 @@ Module(


|
2 | # token starts a statement.
3 |
4 | [1, 2
| ^ Syntax Error: Expected ']', found newline
5 |
6 | def foo():
7 | pass
| Syntax Error: unexpected EOF while parsing
|
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,11 @@ Module(


|
2 | # token starts a statement.
3 |
4 | (1, 2
| ^ Syntax Error: Expected ')', found newline
5 |
6 | def foo():
7 | pass
| Syntax Error: unexpected EOF while parsing
|
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,11 @@ Module(


|
2 | # token starts a statement.
3 |
4 | {1, 2
| ^ Syntax Error: Expected '}', found newline
5 |
6 | def foo():
7 | pass
| Syntax Error: unexpected EOF while parsing
|
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,9 @@ Module(

|
1 | from x import (a, b
| ^ Syntax Error: Expected ')', found newline
2 | 1 + 1
3 | from x import (a, b,
| ^^^^ Syntax Error: Simple statements must be separated by newlines or semicolons
4 | 2 + 2
|

Expand All @@ -156,6 +156,9 @@ Module(


|
1 | from x import (a, b
2 | 1 + 1
3 | from x import (a, b,
| ^ Syntax Error: Expected ')', found newline
4 | 2 + 2
|
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ Module(
|
1 | def foo # comment
2 | def bar(): ...
| ^^^ Syntax Error: Expected ')', found 'def'
| ^^^ Syntax Error: Expected a parameter or the end of the parameter list
3 | def baz
|

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -332,9 +332,3 @@ Module(
5 | with (item1, item2: ...
| ^ Syntax Error: Expected ',', found ':'
|


|
4 | with (item1, item2 as f1 item3, item4): ...
5 | with (item1, item2: ...
|

0 comments on commit 9e56495

Please sign in to comment.