Skip to content

Commit

Permalink
Update W605 to check in f-strings (#7329)
Browse files Browse the repository at this point in the history
This PR updates the `W605` (invalid-escape-sequence) to check inside
f-strings. It also adds support to report violation on invalid escape
sequence within f-strings w.r.t. the curly braces. So, the following
cases will be identified:

```python
f"\{1}"
f"\{{1}}"
f"{1:\}"
```

The new CPython parser also gives out a syntax warning for such cases:

```
fstring.py:1: SyntaxWarning: invalid escape sequence '\{'
  f"\{1}"
fstring.py:2: SyntaxWarning: invalid escape sequence '\{'
  f"\{{1}}"
fstring.py:3: SyntaxWarning: invalid escape sequence '\}'
  f"{1:\}"
```

Nested f-strings are supported here, so the generated fix is aware of that
and will create an edit for the proper f-string.

Add new test cases for f-strings.

fixes: #7295
  • Loading branch information
dhruvmanila committed Sep 20, 2023
1 parent 70fba75 commit 9c75fa3
Show file tree
Hide file tree
Showing 5 changed files with 355 additions and 29 deletions.
54 changes: 54 additions & 0 deletions crates/ruff_linter/resources/test/fixtures/pycodestyle/W605_2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Same as `W605_0.py` but using f-strings instead.

#: W605:1:10
regex = f'\.png$'

#: W605:2:1
regex = f'''
\.png$
'''

#: W605:2:6
f(
f'\_'
)

#: W605:4:6
f"""
multi-line
literal
with \_ somewhere
in the middle
"""

#: W605:1:38
value = f'new line\nand invalid escape \_ here'


#: Okay
regex = fr'\.png$'
regex = f'\\.png$'
regex = fr'''
\.png$
'''
regex = fr'''
\\.png$
'''
s = f'\\'
regex = f'\w' # noqa
regex = f'''
\w
''' # noqa

regex = f'\\\_'
value = f'\{{1}}'
value = f'\{1}'
value = f'{1:\}'
value = f"{f"\{1}"}"
value = rf"{f"\{1}"}"

# Okay
value = rf'\{{1}}'
value = rf'\{1}'
value = rf'{1:\}'
value = f"{rf"\{1}"}"
16 changes: 8 additions & 8 deletions crates/ruff_linter/src/checkers/tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,14 @@ pub(crate) fn check_tokens(

if settings.rules.enabled(Rule::InvalidEscapeSequence) {
for (tok, range) in tokens.iter().flatten() {
if tok.is_string() {
pycodestyle::rules::invalid_escape_sequence(
&mut diagnostics,
locator,
*range,
settings.rules.should_fix(Rule::InvalidEscapeSequence),
);
}
pycodestyle::rules::invalid_escape_sequence(
&mut diagnostics,
locator,
indexer,
tok,
*range,
settings.rules.should_fix(Rule::InvalidEscapeSequence),
);
}
}

Expand Down
1 change: 1 addition & 0 deletions crates/ruff_linter/src/rules/pycodestyle/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ mod tests {
#[test_case(Rule::BlankLineWithWhitespace, Path::new("W29.py"))]
#[test_case(Rule::InvalidEscapeSequence, Path::new("W605_0.py"))]
#[test_case(Rule::InvalidEscapeSequence, Path::new("W605_1.py"))]
#[test_case(Rule::InvalidEscapeSequence, Path::new("W605_2.py"))]
#[test_case(Rule::LineTooLong, Path::new("E501.py"))]
#[test_case(Rule::MixedSpacesAndTabs, Path::new("E101.py"))]
#[test_case(Rule::ModuleImportNotAtTopOfFile, Path::new("E40.py"))]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ use memchr::memchr_iter;

use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::str::{leading_quote, trailing_quote};
use ruff_python_index::Indexer;
use ruff_python_parser::Tok;
use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};

Expand Down Expand Up @@ -45,29 +46,32 @@ impl AlwaysAutofixableViolation for InvalidEscapeSequence {
pub(crate) fn invalid_escape_sequence(
diagnostics: &mut Vec<Diagnostic>,
locator: &Locator,
range: TextRange,
indexer: &Indexer,
token: &Tok,
token_range: TextRange,
autofix: bool,
) {
let text = locator.slice(range);

// Determine whether the string is single- or triple-quoted.
let Some(leading_quote) = leading_quote(text) else {
return;
};
let Some(trailing_quote) = trailing_quote(text) else {
return;
let token_source_code = match token {
Tok::FStringMiddle { value, is_raw } => {
if *is_raw {
return;
}
value.as_str()
}
Tok::String { kind, .. } => {
if kind.is_raw() {
return;
}
locator.slice(token_range)
}
_ => return,
};
let body = &text[leading_quote.len()..text.len() - trailing_quote.len()];

if leading_quote.contains(['r', 'R']) {
return;
}

let mut contains_valid_escape_sequence = false;
let mut invalid_escape_sequence = Vec::new();

let mut prev = None;
let bytes = body.as_bytes();
let bytes = token_source_code.as_bytes();
for i in memchr_iter(b'\\', bytes) {
// If the previous character was also a backslash, skip.
if prev.is_some_and(|prev| prev == i - 1) {
Expand All @@ -77,9 +81,38 @@ pub(crate) fn invalid_escape_sequence(

prev = Some(i);

let Some(next_char) = body[i + 1..].chars().next() else {
let next_char = match token_source_code[i + 1..].chars().next() {
Some(next_char) => next_char,
None if token.is_f_string_middle() => {
// If we're at the end of a f-string middle token, the next character
// is actually emitted as a different token. For example,
//
// ```python
// f"\{1}"
// ```
//
// is lexed as `FStringMiddle('\\')` and `LBrace` (ignoring irrelevant
// tokens), so we need to check the next character in the source code.
//
// Now, if we're at the end of the f-string itself, the lexer wouldn't
// have emitted the `FStringMiddle` token in the first place. For example,
//
// ```python
// f"foo\"
// ```
//
// Here, there won't be any `FStringMiddle` because it's an unterminated
// f-string. This means that if there's a `FStringMiddle` token and we
// encounter a `\` character, then the next character is always going to
// be part of the f-string.
if let Some(next_char) = locator.after(token_range.end()).chars().next() {
next_char
} else {
continue;
}
}
// If we're at the end of the file, skip.
continue;
None => continue,
};

// If we're at the end of line, skip.
Expand Down Expand Up @@ -120,7 +153,7 @@ pub(crate) fn invalid_escape_sequence(
continue;
}

let location = range.start() + leading_quote.text_len() + TextSize::try_from(i).unwrap();
let location = token_range.start() + TextSize::try_from(i).unwrap();
let range = TextRange::at(location, next_char.text_len() + TextSize::from(1));
invalid_escape_sequence.push(Diagnostic::new(InvalidEscapeSequence(next_char), range));
}
Expand All @@ -135,14 +168,25 @@ pub(crate) fn invalid_escape_sequence(
)));
}
} else {
let tok_start = if token.is_f_string_middle() {
// SAFETY: If this is a `FStringMiddle` token, then the indexer
// must have the f-string range.
indexer
.fstring_ranges()
.innermost(token_range.start())
.unwrap()
.start()
} else {
token_range.start()
};
// Turn into raw string.
for diagnostic in &mut invalid_escape_sequence {
// If necessary, add a space between any leading keyword (`return`, `yield`,
// `assert`, etc.) and the string. For example, `return"foo"` is valid, but
// `returnr"foo"` is not.
diagnostic.set_fix(Fix::automatic(Edit::insertion(
pad_start("r".to_string(), range.start(), locator),
range.start(),
pad_start("r".to_string(), tok_start, locator),
tok_start,
)));
}
}
Expand Down
Loading

0 comments on commit 9c75fa3

Please sign in to comment.