Skip to content

Commit

Permalink
Update RUF001, RUF003 to check in f-strings (#7477)
Browse files Browse the repository at this point in the history
## Summary

This PR updates the rule `RUF001` and `RUF003` to check in f-strings using the
`FStringMiddle` token which contains the non-expression part of a f-string.

For reference,
| Code | Name | Message|
| --- | --- | --- |
| RUF001 | ambiguous-unicode-character-string | String contains ambiguous {}. Did you mean {}? |
| RUF003 | ambiguous-unicode-character-comment | Comment contains ambiguous {}. Did you mean {}? |

## Test Plan

`cargo test`
  • Loading branch information
dhruvmanila authored Sep 19, 2023
1 parent 1b2ef3e commit 50d1553
Show file tree
Hide file tree
Showing 4 changed files with 131 additions and 16 deletions.
16 changes: 16 additions & 0 deletions crates/ruff/resources/test/fixtures/ruff/confusables.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,19 @@ def f():
# consisting of a single ambiguous character, while the second character is a "word
# boundary" (whitespace) that it itself ambiguous.
x = "Р усский"

# Same test cases as above but using f-strings instead:
x = f"𝐁ad string"
x = f"−"
x = f"Русский"
x = f"βα Bαd"
x = f"Р усский"

# Nested f-strings
x = f"𝐁ad string {f" {f"Р усский"}"}"

# Comments inside f-strings
x = f"string { # And here's a comment with an unusual parenthesis: )
# And here's a comment with a greek alpha: ∗
foo # And here's a comment with an unusual punctuation mark: ᜵
}"
34 changes: 18 additions & 16 deletions crates/ruff/src/checkers/tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,23 +45,25 @@ pub(crate) fn check_tokens(
let mut state_machine = StateMachine::default();
for &(ref tok, range) in tokens.iter().flatten() {
let is_docstring = state_machine.consume(tok);
if matches!(tok, Tok::String { .. } | Tok::Comment(_)) {
ruff::rules::ambiguous_unicode_character(
&mut diagnostics,
locator,
range,
if tok.is_string() {
if is_docstring {
Context::Docstring
} else {
Context::String
}
let context = match tok {
Tok::String { .. } => {
if is_docstring {
Context::Docstring
} else {
Context::Comment
},
settings,
);
}
Context::String
}
}
Tok::FStringMiddle { .. } => Context::String,
Tok::Comment(_) => Context::Comment,
_ => continue,
};
ruff::rules::ambiguous_unicode_character(
&mut diagnostics,
locator,
range,
context,
settings,
);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ impl Violation for AmbiguousUnicodeCharacterComment {
}
}

/// RUF001, RUF002, RUF003
pub(crate) fn ambiguous_unicode_character(
diagnostics: &mut Vec<Diagnostic>,
locator: &Locator,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ confusables.py:31:6: RUF001 String contains ambiguous `Р` (CYRILLIC CAPITAL LET
30 | # boundary" (whitespace) that it itself ambiguous.
31 | x = "Р усский"
| ^ RUF001
32 |
33 | # Same test cases as above but using f-strings instead:
|

confusables.py:31:7: RUF001 String contains ambiguous ` ` (EN QUAD). Did you mean ` ` (SPACE)?
Expand All @@ -57,6 +59,100 @@ confusables.py:31:7: RUF001 String contains ambiguous ` ` (EN QUAD). Did you m
30 | # boundary" (whitespace) that it itself ambiguous.
31 | x = "Р усский"
| ^ RUF001
32 |
33 | # Same test cases as above but using f-strings instead:
|

confusables.py:34:7: RUF001 String contains ambiguous `𝐁` (MATHEMATICAL BOLD CAPITAL B). Did you mean `B` (LATIN CAPITAL LETTER B)?
|
33 | # Same test cases as above but using f-strings instead:
34 | x = f"𝐁ad string"
| ^ RUF001
35 | x = f""
36 | x = f"Русский"
|

confusables.py:37:11: RUF001 String contains ambiguous `α` (GREEK SMALL LETTER ALPHA). Did you mean `a` (LATIN SMALL LETTER A)?
|
35 | x = f""
36 | x = f"Русский"
37 | x = f"βα Bαd"
| ^ RUF001
38 | x = f"Р усский"
|

confusables.py:38:7: RUF001 String contains ambiguous `Р` (CYRILLIC CAPITAL LETTER ER). Did you mean `P` (LATIN CAPITAL LETTER P)?
|
36 | x = f"Русский"
37 | x = f"βα Bαd"
38 | x = f"Р усский"
| ^ RUF001
39 |
40 | # Nested f-strings
|

confusables.py:38:8: RUF001 String contains ambiguous ` ` (EN QUAD). Did you mean ` ` (SPACE)?
|
36 | x = f"Русский"
37 | x = f"βα Bαd"
38 | x = f"Р усский"
| ^ RUF001
39 |
40 | # Nested f-strings
|

confusables.py:41:7: RUF001 String contains ambiguous `𝐁` (MATHEMATICAL BOLD CAPITAL B). Did you mean `B` (LATIN CAPITAL LETTER B)?
|
40 | # Nested f-strings
41 | x = f"𝐁ad string {f" {f"Р усский"}"}"
| ^ RUF001
42 |
43 | # Comments inside f-strings
|

confusables.py:41:21: RUF001 String contains ambiguous ` ` (EN QUAD). Did you mean ` ` (SPACE)?
|
40 | # Nested f-strings
41 | x = f"𝐁ad string {f" {f"Р усский"}"}"
| ^ RUF001
42 |
43 | # Comments inside f-strings
|

confusables.py:41:25: RUF001 String contains ambiguous `Р` (CYRILLIC CAPITAL LETTER ER). Did you mean `P` (LATIN CAPITAL LETTER P)?
|
40 | # Nested f-strings
41 | x = f"𝐁ad string {f" {f"Р усский"}"}"
| ^ RUF001
42 |
43 | # Comments inside f-strings
|

confusables.py:41:26: RUF001 String contains ambiguous ` ` (EN QUAD). Did you mean ` ` (SPACE)?
|
40 | # Nested f-strings
41 | x = f"𝐁ad string {f" {f"Р усский"}"}"
| ^ RUF001
42 |
43 | # Comments inside f-strings
|

confusables.py:44:68: RUF003 Comment contains ambiguous `` (FULLWIDTH RIGHT PARENTHESIS). Did you mean `)` (RIGHT PARENTHESIS)?
|
43 | # Comments inside f-strings
44 | x = f"string { # And here's a comment with an unusual parenthesis:
| ^^ RUF003
45 | # And here's a comment with a greek alpha:
46 | foo # And here's a comment with an unusual punctuation mark:
|

confusables.py:46:62: RUF003 Comment contains ambiguous `` (PHILIPPINE SINGLE PUNCTUATION). Did you mean `/` (SOLIDUS)?
|
44 | x = f"string { # And here's a comment with an unusual parenthesis:
45 | # And here's a comment with a greek alpha:
46 | foo # And here's a comment with an unusual punctuation mark:
| ^ RUF003
47 | }"
|


0 comments on commit 50d1553

Please sign in to comment.