From 8a15678ad03d0b03a3e3ae337a66907c73c10131 Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Wed, 17 Jan 2024 14:09:21 +0000 Subject: [PATCH 01/21] [Proof of concept] Add a rule/autofix to sort `__slots__` and `__match_args__` --- .../resources/test/fixtures/ruff/RUF023.py | 141 +++++++++ .../src/checkers/ast/analyze/statement.rs | 6 + crates/ruff_linter/src/codes.rs | 1 + crates/ruff_linter/src/rules/ruff/mod.rs | 1 + .../ruff_linter/src/rules/ruff/rules/mod.rs | 3 + .../src/rules/ruff/rules/sort_dunder_all.rs | 149 ++-------- .../src/rules/ruff/rules/sort_dunder_slots.rs | 199 +++++++++++++ .../src/rules/ruff/rules/sorting_helpers.rs | 202 +++++++++++++ ..._rules__ruff__tests__RUF023_RUF023.py.snap | 272 ++++++++++++++++++ 9 files changed, 843 insertions(+), 131 deletions(-) create mode 100644 crates/ruff_linter/resources/test/fixtures/ruff/RUF023.py create mode 100644 crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs create mode 100644 crates/ruff_linter/src/rules/ruff/rules/sorting_helpers.rs create mode 100644 crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__RUF023_RUF023.py.snap diff --git a/crates/ruff_linter/resources/test/fixtures/ruff/RUF023.py b/crates/ruff_linter/resources/test/fixtures/ruff/RUF023.py new file mode 100644 index 0000000000000..b848779e1b786 --- /dev/null +++ b/crates/ruff_linter/resources/test/fixtures/ruff/RUF023.py @@ -0,0 +1,141 @@ +######################### +# Single-line definitions +######################### + +class Klass: + __slots__ = ["d", "c", "b", "a"] # a comment that is untouched + __match_args__ = ("d", "c", "b", "a") + + # Quoting style is retained, + # but unnecessary parens are not + __slots__: set = {'b', "c", ((('a')))} + # Trailing commas are also not retained in the fix + __match_args__: tuple = ("b", "c", "a",) + +class Klass2: + if bool(): + __slots__ = {"x": "docs for x", "m": "docs for m", "a": "docs for a"} + else: + __slots__ = "foo3", "foo2", "foo1" # NB: an implicit tuple (without parens) + + __match_args__: list[str] = ["the", "three", "little", "pigs"] + + __slots__ = ("parenthesized_item"), "in", ("an_unparenthesized_tuple") + +################################################## +# Multiline definitions are flagged, but not fixed +################################################## + +class Klass3: + __slots__ = ( + "d0", + "c0", # a comment regarding 'c0' + "b0", + # a comment regarding 'a0': + "a0" + ) + + __match_args__ = [ + "d", + "c", # a comment regarding 'c' + "b", + # a comment regarding 'a': + "a" + ] + +class Klass4: + # we use natural sort, + # not alphabetical sort. + __slots__ = {"aadvark237", "aadvark10092", "aadvark174", "aadvark532"} + + __match_args__ = ( + "look", + ( + "a_veeeeeeeeeeeeeeeeeeery_long_parenthesized_item" + ), + ) + + __slots__ = ( + "b", + (( + "c" + )), + "a" + ) + + __slots__ = ("don't" "care" "about", "__all__" "with", "concatenated" "strings") + +################################### +# These should all not get flagged: +################################### + +class Klass5: + __slots__ = () + __match_args__ = [] + __slots__ = ("single_item",) + __match_args__ = ( + "single_item_multiline", + ) + __slots__ = {"single_item",} + __slots__ = {"single_item_no_trailing_comma": "docs for that"} + __match_args__ = [ + "single_item_multiline_no_trailing_comma" + ] + __slots__ = ("not_a_tuple_just_a_string") + __slots__ = ["a", "b", "c", "d"] + __slots__ += ["e", "f", "g"] + __slots__ = ("a", "b", "c", "d") + + if bool(): + __slots__ += ("e", "f", "g") + else: + __slots__ += ["alpha", "omega"] + +__slots__ = ("b", "a", "e", "d") +__slots__ = ["b", "a", "e", "d"] +__match_args__ = ["foo", "bar", "antipasti"] + +class Klass6: + __slots__ = (9, 8, 7) + __match_args__ = ( # This is just an empty tuple, + # but, + # it's very well + ) # documented + + # We don't deduplicate elements; + # this just ensures that duplicate elements aren't unnecessarily + # reordered by an autofix: + __slots__ = ( + "duplicate_element", # comment1 + "duplicate_element", # comment3 + "duplicate_element", # comment2 + "duplicate_element", # comment0 + ) + + __slots__ =[ + [] + ] + __slots__ = [ + () + ] + __match_args__ = ( + () + ) + __match_args__ = ( + [] + ) + __slots__ = ( + (), + ) + __slots__ = ( + [], + ) + __match_args__ = ( + "foo", [], "bar" + ) + __match_args__ = [ + "foo", (), "bar" + ] + + __match_args__ = {"a", "set", "for", "__match_args__", "is invalid"} + __match_args__ = {"this": "is", "also": "invalid"} diff --git a/crates/ruff_linter/src/checkers/ast/analyze/statement.rs b/crates/ruff_linter/src/checkers/ast/analyze/statement.rs index c31f147997663..98992f86cf266 100644 --- a/crates/ruff_linter/src/checkers/ast/analyze/statement.rs +++ b/crates/ruff_linter/src/checkers/ast/analyze/statement.rs @@ -1458,6 +1458,9 @@ pub(crate) fn statement(stmt: &Stmt, checker: &mut Checker) { if checker.settings.rules.enabled(Rule::UnsortedDunderAll) { ruff::rules::sort_dunder_all_assign(checker, assign); } + if checker.enabled(Rule::UnsortedDunderSlots) { + ruff::rules::sort_dunder_slots_assign(checker, assign); + } if checker.source_type.is_stub() { if checker.any_enabled(&[ Rule::UnprefixedTypeParam, @@ -1531,6 +1534,9 @@ pub(crate) fn statement(stmt: &Stmt, checker: &mut Checker) { if checker.settings.rules.enabled(Rule::UnsortedDunderAll) { ruff::rules::sort_dunder_all_ann_assign(checker, assign_stmt); } + if checker.enabled(Rule::UnsortedDunderSlots) { + ruff::rules::sort_dunder_slots_ann_assign(checker, assign_stmt); + } if checker.source_type.is_stub() { if let Some(value) = value { if checker.enabled(Rule::AssignmentDefaultInStub) { diff --git a/crates/ruff_linter/src/codes.rs b/crates/ruff_linter/src/codes.rs index d073f0223f089..16775cf480ceb 100644 --- a/crates/ruff_linter/src/codes.rs +++ b/crates/ruff_linter/src/codes.rs @@ -925,6 +925,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> { (Ruff, "020") => (RuleGroup::Preview, rules::ruff::rules::NeverUnion), (Ruff, "021") => (RuleGroup::Preview, rules::ruff::rules::ParenthesizeChainedOperators), (Ruff, "022") => (RuleGroup::Preview, rules::ruff::rules::UnsortedDunderAll), + (Ruff, "023") => (RuleGroup::Preview, rules::ruff::rules::UnsortedDunderSlots), (Ruff, "100") => (RuleGroup::Stable, rules::ruff::rules::UnusedNOQA), (Ruff, "200") => (RuleGroup::Stable, rules::ruff::rules::InvalidPyprojectToml), diff --git a/crates/ruff_linter/src/rules/ruff/mod.rs b/crates/ruff_linter/src/rules/ruff/mod.rs index d4eddf142ed95..b65b144248117 100644 --- a/crates/ruff_linter/src/rules/ruff/mod.rs +++ b/crates/ruff_linter/src/rules/ruff/mod.rs @@ -43,6 +43,7 @@ mod tests { #[test_case(Rule::NeverUnion, Path::new("RUF020.py"))] #[test_case(Rule::ParenthesizeChainedOperators, Path::new("RUF021.py"))] #[test_case(Rule::UnsortedDunderAll, Path::new("RUF022.py"))] + #[test_case(Rule::UnsortedDunderSlots, Path::new("RUF023.py"))] fn rules(rule_code: Rule, path: &Path) -> Result<()> { let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy()); let diagnostics = test_path( diff --git a/crates/ruff_linter/src/rules/ruff/rules/mod.rs b/crates/ruff_linter/src/rules/ruff/rules/mod.rs index 07f55e184b1ad..ca37ee15e319d 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/mod.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/mod.rs @@ -14,6 +14,7 @@ pub(crate) use pairwise_over_zipped::*; pub(crate) use parenthesize_logical_operators::*; pub(crate) use quadratic_list_summation::*; pub(crate) use sort_dunder_all::*; +pub(crate) use sort_dunder_slots::*; pub(crate) use static_key_dict_comprehension::*; pub(crate) use unnecessary_iterable_allocation_for_first_element::*; pub(crate) use unnecessary_key_check::*; @@ -36,6 +37,8 @@ mod never_union; mod pairwise_over_zipped; mod parenthesize_logical_operators; mod sort_dunder_all; +mod sort_dunder_slots; +mod sorting_helpers; mod static_key_dict_comprehension; mod unnecessary_iterable_allocation_for_first_element; mod unnecessary_key_check; diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs index 788b300dd56cc..ba59f7ec90fb8 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs @@ -12,8 +12,10 @@ use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange, TextSize}; use crate::checkers::ast::Checker; +use crate::rules::ruff::rules::sorting_helpers::{ + sort_single_line_elements_sequence, SequenceKind, SortClassification, +}; -use is_macro; use itertools::Itertools; use natord; @@ -157,22 +159,25 @@ fn sort_dunder_all(checker: &mut Checker, target: &ast::Expr, node: &ast::Expr) } let (elts, range, kind) = match node { - ast::Expr::List(ast::ExprList { elts, range, .. }) => (elts, *range, DunderAllKind::List), + ast::Expr::List(ast::ExprList { elts, range, .. }) => (elts, *range, SequenceKind::List), ast::Expr::Tuple(tuple_node @ ast::ExprTuple { elts, range, .. }) => { - (elts, *range, DunderAllKind::Tuple(tuple_node)) + (elts, *range, SequenceKind::Tuple(tuple_node)) } _ => return, }; + let elts = elts.iter().collect_vec(); - let elts_analysis = DunderAllSortClassification::from_elements(elts); + let elts_analysis = SortClassification::from_elements(&elts, |a, b| { + AllItemSortKey::from(a).cmp(&AllItemSortKey::from(b)) + }); if elts_analysis.is_not_a_list_of_string_literals() || elts_analysis.is_sorted() { return; } let mut diagnostic = Diagnostic::new(UnsortedDunderAll, range); - if let DunderAllSortClassification::UnsortedAndMaybeFixable { items } = elts_analysis { - if let Some(fix) = create_fix(range, elts, &items, &kind, checker) { + if let SortClassification::UnsortedAndMaybeFixable { items } = elts_analysis { + if let Some(fix) = create_fix(range, &elts, &items, &kind, checker) { diagnostic.set_fix(fix); } } @@ -180,98 +185,6 @@ fn sort_dunder_all(checker: &mut Checker, target: &ast::Expr, node: &ast::Expr) checker.diagnostics.push(diagnostic); } -/// An enumeration of the two valid ways of defining -/// `__all__`: as a list, or as a tuple. -/// -/// Whereas lists are always parenthesized -/// (they always start with `[` and end with `]`), -/// single-line tuples *can* be unparenthesized. -/// We keep the original AST node around for the -/// Tuple variant so that this can be queried later. -#[derive(Debug)] -enum DunderAllKind<'a> { - List, - Tuple(&'a ast::ExprTuple), -} - -impl DunderAllKind<'_> { - fn is_parenthesized(&self, source: &str) -> bool { - match self { - Self::List => true, - Self::Tuple(ast_node) => ast_node.is_parenthesized(source), - } - } - - fn opening_token_for_multiline_definition(&self) -> Tok { - match self { - Self::List => Tok::Lsqb, - Self::Tuple(_) => Tok::Lpar, - } - } - - fn closing_token_for_multiline_definition(&self) -> Tok { - match self { - Self::List => Tok::Rsqb, - Self::Tuple(_) => Tok::Rpar, - } - } -} - -/// An enumeration of the possible conclusions we could come to -/// regarding the ordering of the elements in an `__all__` definition: -/// -/// 1. `__all__` is a list of string literals that is already sorted -/// 2. `__all__` is an unsorted list of string literals, -/// but we wouldn't be able to autofix it -/// 3. `__all__` is an unsorted list of string literals, -/// and it's possible we could generate a fix for it -/// 4. `__all__` contains one or more items that are not string -/// literals. -/// -/// ("Sorted" here means "ordered according to an isort-style sort". -/// See the module-level docs for a definition of "isort-style sort.") -#[derive(Debug, is_macro::Is)] -enum DunderAllSortClassification<'a> { - Sorted, - UnsortedButUnfixable, - UnsortedAndMaybeFixable { items: Vec<&'a str> }, - NotAListOfStringLiterals, -} - -impl<'a> DunderAllSortClassification<'a> { - fn from_elements(elements: &'a [ast::Expr]) -> Self { - let Some((first, rest @ [_, ..])) = elements.split_first() else { - return Self::Sorted; - }; - let Some(string_node) = first.as_string_literal_expr() else { - return Self::NotAListOfStringLiterals; - }; - let mut this = string_node.value.to_str(); - - for expr in rest { - let Some(string_node) = expr.as_string_literal_expr() else { - return Self::NotAListOfStringLiterals; - }; - let next = string_node.value.to_str(); - if AllItemSortKey::from(next) < AllItemSortKey::from(this) { - let mut items = Vec::with_capacity(elements.len()); - for expr in elements { - let Some(string_node) = expr.as_string_literal_expr() else { - return Self::NotAListOfStringLiterals; - }; - if string_node.value.is_implicit_concatenated() { - return Self::UnsortedButUnfixable; - } - items.push(string_node.value.to_str()); - } - return Self::UnsortedAndMaybeFixable { items }; - } - this = next; - } - Self::Sorted - } -} - /// A struct to implement logic necessary to achieve /// an "isort-style sort". /// @@ -361,9 +274,9 @@ impl InferredMemberType { /// is unfixable in this instance. fn create_fix( range: TextRange, - elts: &[ast::Expr], + elts: &[&ast::Expr], string_items: &[&str], - kind: &DunderAllKind, + kind: &SequenceKind, checker: &Checker, ) -> Option { let locator = checker.locator(); @@ -388,7 +301,9 @@ fn create_fix( assert_eq!(value.items.len(), elts.len()); value.into_sorted_source_code(locator, checker.stylist()) } else { - sort_single_line_dunder_all(elts, string_items, kind, locator) + sort_single_line_elements_sequence(kind, elts, string_items, locator, |a, b| { + AllItemSortKey::from(a).cmp(&AllItemSortKey::from(b)) + }) } }; @@ -413,7 +328,7 @@ impl MultilineDunderAllValue { /// if the analysis fails for whatever reason. fn from_source_range( range: TextRange, - kind: &DunderAllKind, + kind: &SequenceKind, locator: &Locator, ) -> Option { // Parse the multiline `__all__` definition using the raw tokens. @@ -546,7 +461,7 @@ impl Ranged for MultilineDunderAllValue { /// in the original source code. fn collect_dunder_all_lines( range: TextRange, - kind: &DunderAllKind, + kind: &SequenceKind, locator: &Locator, ) -> Option<(Vec, bool)> { // These first two variables are used for keeping track of state @@ -989,31 +904,3 @@ fn multiline_dunder_all_postlude<'a>( } Cow::Borrowed(postlude) } - -/// Create a string representing a fixed-up single-line -/// `__all__` definition, that can be inserted into the -/// source code as a `range_replacement` autofix. -fn sort_single_line_dunder_all( - elts: &[ast::Expr], - elements: &[&str], - kind: &DunderAllKind, - locator: &Locator, -) -> String { - // We grab the original source-code ranges using `locator.slice()` - // rather than using the expression generator, as this approach allows - // us to easily preserve stylistic choices in the original source code - // such as whether double or single quotes were used. - let mut element_pairs = elts.iter().zip(elements).collect_vec(); - element_pairs.sort_by_key(|(_, elem)| AllItemSortKey::from(**elem)); - let joined_items = element_pairs - .iter() - .map(|(elt, _)| locator.slice(elt)) - .join(", "); - match kind { - DunderAllKind::List => format!("[{joined_items}]"), - DunderAllKind::Tuple(_) if kind.is_parenthesized(locator.contents()) => { - format!("({joined_items})") - } - DunderAllKind::Tuple(_) => joined_items, - } -} diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs new file mode 100644 index 0000000000000..eb7ca52d5c95f --- /dev/null +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs @@ -0,0 +1,199 @@ +use std::fmt::Display; + +use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; +use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast as ast; +use ruff_python_semantic::ScopeKind; + +use crate::checkers::ast::Checker; +use crate::rules::ruff::rules::sorting_helpers::{ + sort_single_line_elements_dict, sort_single_line_elements_sequence, DisplayKind, SequenceKind, + SortClassification, +}; + +use natord; + +/// ## What it does +/// Checks for `__slots__` and `__match_args__` +/// definitions that are not ordered according to a +/// [natural sort](https://en.wikipedia.org/wiki/Natural_sort_order). +/// +/// ## Why is this bad? +/// Consistency is good. Use a common convention for +/// these special variables to make your code more +/// readable and idiomatic. +/// +/// ## Example +/// ```python +/// class Dog: +/// __slots__ = "name", "breed" +/// ``` +/// +/// Use instead: +/// ```python +/// class Dog: +/// __slots__ = "breed", "name" +/// ``` +#[violation] +pub struct UnsortedDunderSlots { + class_name: String, + class_variable: SpecialClassDunder, +} + +impl Violation for UnsortedDunderSlots { + const FIX_AVAILABILITY: FixAvailability = FixAvailability::Sometimes; + + #[derive_message_formats] + fn message(&self) -> String { + let UnsortedDunderSlots { + class_name, + class_variable, + } = self; + format!("`{class_name}.{class_variable}` is not sorted") + } + + fn fix_title(&self) -> Option { + let UnsortedDunderSlots { + class_name, + class_variable, + } = self; + Some(format!( + "Apply a natural sort to `{class_name}.{class_variable}`" + )) + } +} + +#[derive(Debug, Eq, PartialEq, Copy, Clone)] +enum SpecialClassDunder { + Slots, + MatchArgs, +} + +impl Display for SpecialClassDunder { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let string = match self { + Self::MatchArgs => "__match_args__", + Self::Slots => "__slots__", + }; + write!(f, "{string}") + } +} + +/// Sort a `__slots__`/`__match_args__` definition +/// represented by a `StmtAssign` AST node. +/// For example: `__slots__ = ["b", "c", "a"]`. +pub(crate) fn sort_dunder_slots_assign( + checker: &mut Checker, + ast::StmtAssign { value, targets, .. }: &ast::StmtAssign, +) { + if let [expr] = targets.as_slice() { + sort_dunder_slots(checker, expr, value); + } +} + +/// Sort a `__slots__`/`__match_args__` definition +/// represented by a `StmtAnnAssign` AST node. +/// For example: `__slots__: list[str] = ["b", "c", "a"]`. +pub(crate) fn sort_dunder_slots_ann_assign(checker: &mut Checker, node: &ast::StmtAnnAssign) { + if let Some(value) = &node.value { + sort_dunder_slots(checker, &node.target, value); + } +} + +/// Sort a tuple, list, dict or set that defines `__slots__` +/// or `__match_args__` in a class scope. +/// +/// This routine checks whether the display is sorted, and emits a +/// violation if it is not sorted. If the tuple/list/set was not sorted, +/// it attempts to set a `Fix` on the violation. +fn sort_dunder_slots(checker: &mut Checker, target: &ast::Expr, node: &ast::Expr) { + let ast::Expr::Name(ast::ExprName { id, .. }) = target else { + return; + }; + + let dunder_kind = match id.as_str() { + "__slots__" => SpecialClassDunder::Slots, + "__match_args__" => SpecialClassDunder::MatchArgs, + _ => return, + }; + + // We're only interested in `__slots__`/`__match_args__` in the class scope + let ScopeKind::Class(ast::StmtClassDef { + name: class_name, .. + }) = checker.semantic().current_scope().kind + else { + return; + }; + + let (elts, range, display_kind) = match (dunder_kind, node) { + (_, ast::Expr::List(ast::ExprList { elts, range, .. })) => ( + elts.iter().collect(), + *range, + DisplayKind::Sequence(SequenceKind::List), + ), + (_, ast::Expr::Tuple(tuple_node @ ast::ExprTuple { elts, range, .. })) => { + let display_kind = DisplayKind::Sequence(SequenceKind::Tuple(tuple_node)); + (elts.iter().collect(), *range, display_kind) + } + (SpecialClassDunder::Slots, ast::Expr::Set(ast::ExprSet { elts, range })) => ( + elts.iter().collect(), + *range, + DisplayKind::Sequence(SequenceKind::Set), + ), + ( + SpecialClassDunder::Slots, + ast::Expr::Dict(ast::ExprDict { + keys, + values, + range, + }), + ) => { + let mut narrowed_keys = Vec::with_capacity(keys.len()); + for key in keys { + if let Some(key) = key { + narrowed_keys.push(key); + } else { + return; + } + } + assert_eq!(narrowed_keys.len(), values.len()); + (narrowed_keys, *range, DisplayKind::Dict { values }) + } + _ => return, + }; + + let elts_analysis = SortClassification::from_elements(&elts, natord::compare); + if elts_analysis.is_not_a_list_of_string_literals() || elts_analysis.is_sorted() { + return; + } + + let mut diagnostic = Diagnostic::new( + UnsortedDunderSlots { + class_name: class_name.to_string(), + class_variable: dunder_kind, + }, + range, + ); + + if let SortClassification::UnsortedAndMaybeFixable { items } = elts_analysis { + let locator = checker.locator(); + if !locator.contains_line_break(range) { + let new_var = match display_kind { + DisplayKind::Dict { values } => { + sort_single_line_elements_dict(&elts, &items, values, locator, natord::compare) + } + DisplayKind::Sequence(sequence_kind) => sort_single_line_elements_sequence( + &sequence_kind, + &elts, + &items, + locator, + natord::compare, + ), + }; + let fix = Fix::safe_edit(Edit::range_replacement(new_var, range)); + diagnostic.set_fix(fix); + } + } + + checker.diagnostics.push(diagnostic); +} diff --git a/crates/ruff_linter/src/rules/ruff/rules/sorting_helpers.rs b/crates/ruff_linter/src/rules/ruff/rules/sorting_helpers.rs new file mode 100644 index 0000000000000..979393a08c527 --- /dev/null +++ b/crates/ruff_linter/src/rules/ruff/rules/sorting_helpers.rs @@ -0,0 +1,202 @@ +/// Miscellaneous helpers for sorting constant lists of string literals. +/// +/// Examples where these are useful: +/// - Sorting `__all__` in the global scope, +/// - Sorting `__slots__` in a class scope +use std::cmp::Ordering; + +use ruff_python_ast as ast; +use ruff_python_parser::Tok; +use ruff_source_file::Locator; + +use is_macro; +use itertools::{izip, Itertools}; + +/// An enumeration of the various kinds of sequences for which Python has +/// [display literals](https://docs.python.org/3/reference/expressions.html#displays-for-lists-sets-and-dictionaries). +/// +/// (I'm aware a set isn't actually a "sequence", +/// *but* for our purposes it's conceptually a sequence, +/// since in terms of the AST structure it's almost identical +/// to tuples/lists. Dicts are the "odd one out" here.) +#[derive(Debug)] +pub(crate) enum SequenceKind<'a> { + List, + Set, + Tuple(&'a ast::ExprTuple), +} + +impl SequenceKind<'_> { + fn surrounding_parens(&self, source: &str) -> (&str, &str) { + match self { + Self::List => ("[", "]"), + Self::Set => ("{", "}"), + Self::Tuple(ast_node) => { + if ast_node.is_parenthesized(source) { + ("(", ")") + } else { + ("", "") + } + } + } + } + + pub(crate) fn opening_token_for_multiline_definition(&self) -> Tok { + match self { + Self::List => Tok::Lsqb, + Self::Set => Tok::Lbrace, + Self::Tuple(_) => Tok::Lpar, + } + } + + pub(crate) fn closing_token_for_multiline_definition(&self) -> Tok { + match self { + Self::List => Tok::Rsqb, + Self::Set => Tok::Rbrace, + Self::Tuple(_) => Tok::Rpar, + } + } +} + +/// An enumeration of the various kinds of +/// +/// Python provides for builtin containers. +/// +/// Whereas lists, dicts and sets are always parenthesized +/// (e.g. lists always start with `[` and end with `]`), +/// single-line tuples *can* be unparenthesized. +/// We keep the original AST node around for the +/// Tuple variant so that this can be queried later. +#[derive(Debug, is_macro::Is)] +pub(crate) enum DisplayKind<'a> { + Sequence(SequenceKind<'a>), + Dict { values: &'a Vec }, +} + +/// Create a string representing a fixed-up single-line +/// definition of `__all__` or `__slots__` (etc.), +/// that can be inserted into the +/// source code as a `range_replacement` autofix. +pub(crate) fn sort_single_line_elements_sequence( + kind: &SequenceKind, + elts: &[&ast::Expr], + elements: &[&str], + locator: &Locator, + mut cmp_fn: F, +) -> String +where + F: FnMut(&str, &str) -> Ordering, +{ + assert_eq!(elts.len(), elements.len()); + let (opening_paren, closing_paren) = kind.surrounding_parens(locator.contents()); + let last_item_index = elements.len().saturating_sub(1); + let mut result = String::from(opening_paren); + + let mut element_pairs = elements.iter().zip(elts).collect_vec(); + element_pairs.sort_by(|(elem1, _), (elem2, _)| cmp_fn(elem1, elem2)); + // We grab the original source-code ranges using `locator.slice()` + // rather than using the expression generator, as this approach allows + // us to easily preserve stylistic choices in the original source code + // such as whether double or single quotes were used. + for (i, (_, elt)) in element_pairs.iter().enumerate() { + result.push_str(locator.slice(elt)); + if i < last_item_index { + result.push_str(", "); + } + } + + result.push_str(closing_paren); + result +} + +/// Create a string representing a fixed-up single-line +/// definition of `__all__` or `__slots__` (etc.), +/// that can be inserted into the +/// source code as a `range_replacement` autofix. +pub(crate) fn sort_single_line_elements_dict( + key_elts: &[&ast::Expr], + elements: &[&str], + value_elts: &[ast::Expr], + locator: &Locator, + mut cmp_fn: F, +) -> String +where + F: FnMut(&str, &str) -> Ordering, +{ + assert!(key_elts.len() == elements.len() && elements.len() == value_elts.len()); + let last_item_index = elements.len().saturating_sub(1); + let mut result = String::from('{'); + + let mut element_trios = izip!(elements, key_elts, value_elts).collect_vec(); + element_trios.sort_by(|(elem1, _, _), (elem2, _, _)| cmp_fn(elem1, elem2)); + // We grab the original source-code ranges using `locator.slice()` + // rather than using the expression generator, as this approach allows + // us to easily preserve stylistic choices in the original source code + // such as whether double or single quotes were used. + for (i, (_, key, value)) in element_trios.iter().enumerate() { + result.push_str(locator.slice(key)); + result.push_str(": "); + result.push_str(locator.slice(value)); + if i < last_item_index { + result.push_str(", "); + } + } + + result.push('}'); + result +} + +/// An enumeration of the possible conclusions we could come to +/// regarding the ordering of the elements in a display of string literals: +/// +/// 1. It's a display of string literals that is already sorted +/// 2. It's an unsorted display of string literals, +/// but we wouldn't be able to autofix it +/// 3. It's an unsorted display of string literals, +/// and it's possible we could generate a fix for it +/// 4. The display contains one or more items that are not string +/// literals. +#[derive(Debug, is_macro::Is)] +pub(crate) enum SortClassification<'a> { + Sorted, + UnsortedButUnfixable, + UnsortedAndMaybeFixable { items: Vec<&'a str> }, + NotAListOfStringLiterals, +} + +impl<'a> SortClassification<'a> { + pub(crate) fn from_elements(elements: &'a [&ast::Expr], mut cmp_fn: F) -> Self + where + F: FnMut(&str, &str) -> Ordering, + { + let Some((first, rest @ [_, ..])) = elements.split_first() else { + return Self::Sorted; + }; + let Some(string_node) = first.as_string_literal_expr() else { + return Self::NotAListOfStringLiterals; + }; + let mut this = string_node.value.to_str(); + + for expr in rest { + let Some(string_node) = expr.as_string_literal_expr() else { + return Self::NotAListOfStringLiterals; + }; + let next = string_node.value.to_str(); + if cmp_fn(next, this).is_lt() { + let mut items = Vec::with_capacity(elements.len()); + for expr in elements { + let Some(string_node) = expr.as_string_literal_expr() else { + return Self::NotAListOfStringLiterals; + }; + if string_node.value.is_implicit_concatenated() { + return Self::UnsortedButUnfixable; + } + items.push(string_node.value.to_str()); + } + return Self::UnsortedAndMaybeFixable { items }; + } + this = next; + } + Self::Sorted + } +} diff --git a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__RUF023_RUF023.py.snap b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__RUF023_RUF023.py.snap new file mode 100644 index 0000000000000..4abba38e93994 --- /dev/null +++ b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__RUF023_RUF023.py.snap @@ -0,0 +1,272 @@ +--- +source: crates/ruff_linter/src/rules/ruff/mod.rs +--- +RUF023.py:6:17: RUF023 [*] `Klass.__slots__` is not sorted + | +5 | class Klass: +6 | __slots__ = ["d", "c", "b", "a"] # a comment that is untouched + | ^^^^^^^^^^^^^^^^^^^^ RUF023 +7 | __match_args__ = ("d", "c", "b", "a") + | + = help: Apply a natural sort to `Klass.__slots__` + +ℹ Safe fix +3 3 | ######################### +4 4 | +5 5 | class Klass: +6 |- __slots__ = ["d", "c", "b", "a"] # a comment that is untouched + 6 |+ __slots__ = ["a", "b", "c", "d"] # a comment that is untouched +7 7 | __match_args__ = ("d", "c", "b", "a") +8 8 | +9 9 | # Quoting style is retained, + +RUF023.py:7:22: RUF023 [*] `Klass.__match_args__` is not sorted + | +5 | class Klass: +6 | __slots__ = ["d", "c", "b", "a"] # a comment that is untouched +7 | __match_args__ = ("d", "c", "b", "a") + | ^^^^^^^^^^^^^^^^^^^^ RUF023 +8 | +9 | # Quoting style is retained, + | + = help: Apply a natural sort to `Klass.__match_args__` + +ℹ Safe fix +4 4 | +5 5 | class Klass: +6 6 | __slots__ = ["d", "c", "b", "a"] # a comment that is untouched +7 |- __match_args__ = ("d", "c", "b", "a") + 7 |+ __match_args__ = ("a", "b", "c", "d") +8 8 | +9 9 | # Quoting style is retained, +10 10 | # but unnecessary parens are not + +RUF023.py:11:22: RUF023 [*] `Klass.__slots__` is not sorted + | + 9 | # Quoting style is retained, +10 | # but unnecessary parens are not +11 | __slots__: set = {'b', "c", ((('a')))} + | ^^^^^^^^^^^^^^^^^^^^^ RUF023 +12 | # Trailing commas are also not retained in the fix +13 | __match_args__: tuple = ("b", "c", "a",) + | + = help: Apply a natural sort to `Klass.__slots__` + +ℹ Safe fix +8 8 | +9 9 | # Quoting style is retained, +10 10 | # but unnecessary parens are not +11 |- __slots__: set = {'b', "c", ((('a')))} + 11 |+ __slots__: set = {'a', 'b', "c"} +12 12 | # Trailing commas are also not retained in the fix +13 13 | __match_args__: tuple = ("b", "c", "a",) +14 14 | + +RUF023.py:13:29: RUF023 [*] `Klass.__match_args__` is not sorted + | +11 | __slots__: set = {'b', "c", ((('a')))} +12 | # Trailing commas are also not retained in the fix +13 | __match_args__: tuple = ("b", "c", "a",) + | ^^^^^^^^^^^^^^^^ RUF023 +14 | +15 | class Klass2: + | + = help: Apply a natural sort to `Klass.__match_args__` + +ℹ Safe fix +10 10 | # but unnecessary parens are not +11 11 | __slots__: set = {'b', "c", ((('a')))} +12 12 | # Trailing commas are also not retained in the fix +13 |- __match_args__: tuple = ("b", "c", "a",) + 13 |+ __match_args__: tuple = ("a", "b", "c") +14 14 | +15 15 | class Klass2: +16 16 | if bool(): + +RUF023.py:17:21: RUF023 [*] `Klass2.__slots__` is not sorted + | +15 | class Klass2: +16 | if bool(): +17 | __slots__ = {"x": "docs for x", "m": "docs for m", "a": "docs for a"} + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF023 +18 | else: +19 | __slots__ = "foo3", "foo2", "foo1" # NB: an implicit tuple (without parens) + | + = help: Apply a natural sort to `Klass2.__slots__` + +ℹ Safe fix +14 14 | +15 15 | class Klass2: +16 16 | if bool(): +17 |- __slots__ = {"x": "docs for x", "m": "docs for m", "a": "docs for a"} + 17 |+ __slots__ = {"a": "docs for a", "m": "docs for m", "x": "docs for x"} +18 18 | else: +19 19 | __slots__ = "foo3", "foo2", "foo1" # NB: an implicit tuple (without parens) +20 20 | + +RUF023.py:19:21: RUF023 [*] `Klass2.__slots__` is not sorted + | +17 | __slots__ = {"x": "docs for x", "m": "docs for m", "a": "docs for a"} +18 | else: +19 | __slots__ = "foo3", "foo2", "foo1" # NB: an implicit tuple (without parens) + | ^^^^^^^^^^^^^^^^^^^^^^ RUF023 +20 | +21 | __match_args__: list[str] = ["the", "three", "little", "pigs"] + | + = help: Apply a natural sort to `Klass2.__slots__` + +ℹ Safe fix +16 16 | if bool(): +17 17 | __slots__ = {"x": "docs for x", "m": "docs for m", "a": "docs for a"} +18 18 | else: +19 |- __slots__ = "foo3", "foo2", "foo1" # NB: an implicit tuple (without parens) + 19 |+ __slots__ = "foo1", "foo2", "foo3" # NB: an implicit tuple (without parens) +20 20 | +21 21 | __match_args__: list[str] = ["the", "three", "little", "pigs"] +22 22 | + +RUF023.py:21:33: RUF023 [*] `Klass2.__match_args__` is not sorted + | +19 | __slots__ = "foo3", "foo2", "foo1" # NB: an implicit tuple (without parens) +20 | +21 | __match_args__: list[str] = ["the", "three", "little", "pigs"] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF023 +22 | +23 | __slots__ = ("parenthesized_item"), "in", ("an_unparenthesized_tuple") + | + = help: Apply a natural sort to `Klass2.__match_args__` + +ℹ Safe fix +18 18 | else: +19 19 | __slots__ = "foo3", "foo2", "foo1" # NB: an implicit tuple (without parens) +20 20 | +21 |- __match_args__: list[str] = ["the", "three", "little", "pigs"] + 21 |+ __match_args__: list[str] = ["little", "pigs", "the", "three"] +22 22 | +23 23 | __slots__ = ("parenthesized_item"), "in", ("an_unparenthesized_tuple") +24 24 | + +RUF023.py:23:17: RUF023 [*] `Klass2.__slots__` is not sorted + | +21 | __match_args__: list[str] = ["the", "three", "little", "pigs"] +22 | +23 | __slots__ = ("parenthesized_item"), "in", ("an_unparenthesized_tuple") + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF023 +24 | +25 | ################################################## + | + = help: Apply a natural sort to `Klass2.__slots__` + +ℹ Safe fix +20 20 | +21 21 | __match_args__: list[str] = ["the", "three", "little", "pigs"] +22 22 | +23 |- __slots__ = ("parenthesized_item"), "in", ("an_unparenthesized_tuple") + 23 |+ __slots__ = "an_unparenthesized_tuple", "in", "parenthesized_item" +24 24 | +25 25 | ################################################## +26 26 | # Multiline definitions are flagged, but not fixed + +RUF023.py:30:17: RUF023 `Klass3.__slots__` is not sorted + | +29 | class Klass3: +30 | __slots__ = ( + | _________________^ +31 | | "d0", +32 | | "c0", # a comment regarding 'c0' +33 | | "b0", +34 | | # a comment regarding 'a0': +35 | | "a0" +36 | | ) + | |_____^ RUF023 +37 | +38 | __match_args__ = [ + | + = help: Apply a natural sort to `Klass3.__slots__` + +RUF023.py:38:22: RUF023 `Klass3.__match_args__` is not sorted + | +36 | ) +37 | +38 | __match_args__ = [ + | ______________________^ +39 | | "d", +40 | | "c", # a comment regarding 'c' +41 | | "b", +42 | | # a comment regarding 'a': +43 | | "a" +44 | | ] + | |_____^ RUF023 +45 | +46 | class Klass4: + | + = help: Apply a natural sort to `Klass3.__match_args__` + +RUF023.py:49:17: RUF023 [*] `Klass4.__slots__` is not sorted + | +47 | # we use natural sort, +48 | # not alphabetical sort. +49 | __slots__ = {"aadvark237", "aadvark10092", "aadvark174", "aadvark532"} + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF023 +50 | +51 | __match_args__ = ( + | + = help: Apply a natural sort to `Klass4.__slots__` + +ℹ Safe fix +46 46 | class Klass4: +47 47 | # we use natural sort, +48 48 | # not alphabetical sort. +49 |- __slots__ = {"aadvark237", "aadvark10092", "aadvark174", "aadvark532"} + 49 |+ __slots__ = {"aadvark174", "aadvark237", "aadvark532", "aadvark10092"} +50 50 | +51 51 | __match_args__ = ( +52 52 | "look", + +RUF023.py:51:22: RUF023 `Klass4.__match_args__` is not sorted + | +49 | __slots__ = {"aadvark237", "aadvark10092", "aadvark174", "aadvark532"} +50 | +51 | __match_args__ = ( + | ______________________^ +52 | | "look", +53 | | ( +54 | | "a_veeeeeeeeeeeeeeeeeeery_long_parenthesized_item" +55 | | ), +56 | | ) + | |_____^ RUF023 +57 | +58 | __slots__ = ( + | + = help: Apply a natural sort to `Klass4.__match_args__` + +RUF023.py:58:17: RUF023 `Klass4.__slots__` is not sorted + | +56 | ) +57 | +58 | __slots__ = ( + | _________________^ +59 | | "b", +60 | | (( +61 | | "c" +62 | | )), +63 | | "a" +64 | | ) + | |_____^ RUF023 +65 | +66 | __slots__ = ("don't" "care" "about", "__all__" "with", "concatenated" "strings") + | + = help: Apply a natural sort to `Klass4.__slots__` + +RUF023.py:66:17: RUF023 `Klass4.__slots__` is not sorted + | +64 | ) +65 | +66 | __slots__ = ("don't" "care" "about", "__all__" "with", "concatenated" "strings") + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF023 +67 | +68 | ################################### + | + = help: Apply a natural sort to `Klass4.__slots__` + + From 785a002285c51db304baea065ff2ab24fdcdcc93 Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Wed, 17 Jan 2024 14:49:05 +0000 Subject: [PATCH 02/21] Spruce up a little bit --- .../src/rules/ruff/rules/sort_dunder_slots.rs | 99 ++++++++++++------- .../src/rules/ruff/rules/sorting_helpers.rs | 18 ++-- 2 files changed, 72 insertions(+), 45 deletions(-) diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs index eb7ca52d5c95f..8ae295676cd0a 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs @@ -4,6 +4,8 @@ use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast as ast; use ruff_python_semantic::ScopeKind; +use ruff_source_file::Locator; +use ruff_text_size::TextRange; use crate::checkers::ast::Checker; use crate::rules::ruff::rules::sorting_helpers::{ @@ -63,6 +65,8 @@ impl Violation for UnsortedDunderSlots { } } +/// Enumeration of the two special class dunders +/// that we're interested in for this rule: `__match_args__` and `__slots__` #[derive(Debug, Eq, PartialEq, Copy, Clone)] enum SpecialClassDunder { Slots, @@ -125,7 +129,38 @@ fn sort_dunder_slots(checker: &mut Checker, target: &ast::Expr, node: &ast::Expr return; }; - let (elts, range, display_kind) = match (dunder_kind, node) { + let Some((elts, range, display_kind)) = extract_elts(dunder_kind, node) else { + return; + }; + + let elts_analysis = SortClassification::from_elements(&elts, natord::compare); + if elts_analysis.is_not_a_list_of_string_literals() || elts_analysis.is_sorted() { + return; + } + + let mut diagnostic = Diagnostic::new( + UnsortedDunderSlots { + class_name: class_name.to_string(), + class_variable: dunder_kind, + }, + range, + ); + + if let SortClassification::UnsortedAndMaybeFixable { items } = elts_analysis { + let locator = checker.locator(); + if !locator.contains_line_break(range) { + diagnostic.set_fix(create_fix(display_kind, &elts, &items, range, locator)); + } + } + + checker.diagnostics.push(diagnostic); +} + +fn extract_elts( + dunder_kind: SpecialClassDunder, + node: &ast::Expr, +) -> Option<(Vec<&ast::Expr>, TextRange, DisplayKind<'_>)> { + let result = match (dunder_kind, node) { (_, ast::Expr::List(ast::ExprList { elts, range, .. })) => ( elts.iter().collect(), *range, @@ -153,47 +188,39 @@ fn sort_dunder_slots(checker: &mut Checker, target: &ast::Expr, node: &ast::Expr if let Some(key) = key { narrowed_keys.push(key); } else { - return; + return None; } } + // If `None` was present in the keys, it indicates a "** splat", .e.g + // `__slots__ = {"foo": "bar", **other_dict}` + // If `None` wasn't present in the keys, + // the length of the keys should always equal the length of the values assert_eq!(narrowed_keys.len(), values.len()); (narrowed_keys, *range, DisplayKind::Dict { values }) } - _ => return, + _ => return None, }; + Some(result) +} - let elts_analysis = SortClassification::from_elements(&elts, natord::compare); - if elts_analysis.is_not_a_list_of_string_literals() || elts_analysis.is_sorted() { - return; - } - - let mut diagnostic = Diagnostic::new( - UnsortedDunderSlots { - class_name: class_name.to_string(), - class_variable: dunder_kind, - }, - range, - ); - - if let SortClassification::UnsortedAndMaybeFixable { items } = elts_analysis { - let locator = checker.locator(); - if !locator.contains_line_break(range) { - let new_var = match display_kind { - DisplayKind::Dict { values } => { - sort_single_line_elements_dict(&elts, &items, values, locator, natord::compare) - } - DisplayKind::Sequence(sequence_kind) => sort_single_line_elements_sequence( - &sequence_kind, - &elts, - &items, - locator, - natord::compare, - ), - }; - let fix = Fix::safe_edit(Edit::range_replacement(new_var, range)); - diagnostic.set_fix(fix); +fn create_fix( + display_kind: DisplayKind<'_>, + elts: &[&ast::Expr], + items: &[&str], + range: TextRange, + locator: &Locator, +) -> Fix { + let new_var = match display_kind { + DisplayKind::Dict { values } => { + sort_single_line_elements_dict(elts, items, values, locator, natord::compare) } - } - - checker.diagnostics.push(diagnostic); + DisplayKind::Sequence(sequence_kind) => sort_single_line_elements_sequence( + &sequence_kind, + elts, + items, + locator, + natord::compare, + ), + }; + Fix::safe_edit(Edit::range_replacement(new_var, range)) } diff --git a/crates/ruff_linter/src/rules/ruff/rules/sorting_helpers.rs b/crates/ruff_linter/src/rules/ruff/rules/sorting_helpers.rs index 979393a08c527..1f1b43bf8cb08 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sorting_helpers.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sorting_helpers.rs @@ -2,7 +2,7 @@ /// /// Examples where these are useful: /// - Sorting `__all__` in the global scope, -/// - Sorting `__slots__` in a class scope +/// - Sorting `__slots__` or `__match_args__` in a class scope use std::cmp::Ordering; use ruff_python_ast as ast; @@ -18,7 +18,13 @@ use itertools::{izip, Itertools}; /// (I'm aware a set isn't actually a "sequence", /// *but* for our purposes it's conceptually a sequence, /// since in terms of the AST structure it's almost identical -/// to tuples/lists. Dicts are the "odd one out" here.) +/// to tuples/lists.) +/// +/// Whereas lists, dicts and sets are always parenthesized +/// (e.g. lists always start with `[` and end with `]`), +/// single-line tuples *can* be unparenthesized. +/// We keep the original AST node around for the +/// Tuple variant so that this can be queried later. #[derive(Debug)] pub(crate) enum SequenceKind<'a> { List, @@ -59,14 +65,8 @@ impl SequenceKind<'_> { } /// An enumeration of the various kinds of -/// +/// [display literals](https://docs.python.org/3/reference/expressions.html#displays-for-lists-sets-and-dictionaries) /// Python provides for builtin containers. -/// -/// Whereas lists, dicts and sets are always parenthesized -/// (e.g. lists always start with `[` and end with `]`), -/// single-line tuples *can* be unparenthesized. -/// We keep the original AST node around for the -/// Tuple variant so that this can be queried later. #[derive(Debug, is_macro::Is)] pub(crate) enum DisplayKind<'a> { Sequence(SequenceKind<'a>), From dfeb34152b66255238989f34609cd1b9487979fd Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Wed, 17 Jan 2024 15:16:02 +0000 Subject: [PATCH 03/21] regenerate schema --- ruff.schema.json | 1 + 1 file changed, 1 insertion(+) diff --git a/ruff.schema.json b/ruff.schema.json index ef61102357472..132019a37312c 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -3447,6 +3447,7 @@ "RUF020", "RUF021", "RUF022", + "RUF023", "RUF1", "RUF10", "RUF100", From 39a059bc617307bc49e678e2551e15585d7ccfd8 Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Wed, 17 Jan 2024 18:25:26 +0000 Subject: [PATCH 04/21] Respond to some Micha comments from Discord --- crates/ruff_linter/src/rules/ruff/rules/mod.rs | 2 +- ...{sorting_helpers.rs => sequence_sorting.rs} | 18 +++++++++--------- .../src/rules/ruff/rules/sort_dunder_all.rs | 2 +- .../src/rules/ruff/rules/sort_dunder_slots.rs | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) rename crates/ruff_linter/src/rules/ruff/rules/{sorting_helpers.rs => sequence_sorting.rs} (93%) diff --git a/crates/ruff_linter/src/rules/ruff/rules/mod.rs b/crates/ruff_linter/src/rules/ruff/rules/mod.rs index ca37ee15e319d..b4919df89f993 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/mod.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/mod.rs @@ -36,9 +36,9 @@ mod mutable_dataclass_default; mod never_union; mod pairwise_over_zipped; mod parenthesize_logical_operators; +mod sequence_sorting; mod sort_dunder_all; mod sort_dunder_slots; -mod sorting_helpers; mod static_key_dict_comprehension; mod unnecessary_iterable_allocation_for_first_element; mod unnecessary_key_check; diff --git a/crates/ruff_linter/src/rules/ruff/rules/sorting_helpers.rs b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs similarity index 93% rename from crates/ruff_linter/src/rules/ruff/rules/sorting_helpers.rs rename to crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs index 1f1b43bf8cb08..250d9d9afc50c 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sorting_helpers.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs @@ -1,4 +1,4 @@ -/// Miscellaneous helpers for sorting constant lists of string literals. +/// Utilities for sorting constant lists of string literals. /// /// Examples where these are useful: /// - Sorting `__all__` in the global scope, @@ -26,7 +26,7 @@ use itertools::{izip, Itertools}; /// We keep the original AST node around for the /// Tuple variant so that this can be queried later. #[derive(Debug)] -pub(crate) enum SequenceKind<'a> { +pub(super) enum SequenceKind<'a> { List, Set, Tuple(&'a ast::ExprTuple), @@ -47,7 +47,7 @@ impl SequenceKind<'_> { } } - pub(crate) fn opening_token_for_multiline_definition(&self) -> Tok { + pub(super) fn opening_token_for_multiline_definition(&self) -> Tok { match self { Self::List => Tok::Lsqb, Self::Set => Tok::Lbrace, @@ -55,7 +55,7 @@ impl SequenceKind<'_> { } } - pub(crate) fn closing_token_for_multiline_definition(&self) -> Tok { + pub(super) fn closing_token_for_multiline_definition(&self) -> Tok { match self { Self::List => Tok::Rsqb, Self::Set => Tok::Rbrace, @@ -68,7 +68,7 @@ impl SequenceKind<'_> { /// [display literals](https://docs.python.org/3/reference/expressions.html#displays-for-lists-sets-and-dictionaries) /// Python provides for builtin containers. #[derive(Debug, is_macro::Is)] -pub(crate) enum DisplayKind<'a> { +pub(super) enum DisplayKind<'a> { Sequence(SequenceKind<'a>), Dict { values: &'a Vec }, } @@ -77,7 +77,7 @@ pub(crate) enum DisplayKind<'a> { /// definition of `__all__` or `__slots__` (etc.), /// that can be inserted into the /// source code as a `range_replacement` autofix. -pub(crate) fn sort_single_line_elements_sequence( +pub(super) fn sort_single_line_elements_sequence( kind: &SequenceKind, elts: &[&ast::Expr], elements: &[&str], @@ -113,7 +113,7 @@ where /// definition of `__all__` or `__slots__` (etc.), /// that can be inserted into the /// source code as a `range_replacement` autofix. -pub(crate) fn sort_single_line_elements_dict( +pub(super) fn sort_single_line_elements_dict( key_elts: &[&ast::Expr], elements: &[&str], value_elts: &[ast::Expr], @@ -157,7 +157,7 @@ where /// 4. The display contains one or more items that are not string /// literals. #[derive(Debug, is_macro::Is)] -pub(crate) enum SortClassification<'a> { +pub(super) enum SortClassification<'a> { Sorted, UnsortedButUnfixable, UnsortedAndMaybeFixable { items: Vec<&'a str> }, @@ -165,7 +165,7 @@ pub(crate) enum SortClassification<'a> { } impl<'a> SortClassification<'a> { - pub(crate) fn from_elements(elements: &'a [&ast::Expr], mut cmp_fn: F) -> Self + pub(super) fn from_elements(elements: &'a [&ast::Expr], mut cmp_fn: F) -> Self where F: FnMut(&str, &str) -> Ordering, { diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs index ba59f7ec90fb8..56230ecf62641 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs @@ -12,7 +12,7 @@ use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange, TextSize}; use crate::checkers::ast::Checker; -use crate::rules::ruff::rules::sorting_helpers::{ +use crate::rules::ruff::rules::sequence_sorting::{ sort_single_line_elements_sequence, SequenceKind, SortClassification, }; diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs index 8ae295676cd0a..4412c3f30296d 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs @@ -8,7 +8,7 @@ use ruff_source_file::Locator; use ruff_text_size::TextRange; use crate::checkers::ast::Checker; -use crate::rules::ruff::rules::sorting_helpers::{ +use crate::rules::ruff::rules::sequence_sorting::{ sort_single_line_elements_dict, sort_single_line_elements_sequence, DisplayKind, SequenceKind, SortClassification, }; From 37ee87954cf10e5af6a4a2dabcee1b157a0b8e4b Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Thu, 18 Jan 2024 16:26:08 +0000 Subject: [PATCH 05/21] Avoid allocations in the common case --- .../src/rules/ruff/rules/sequence_sorting.rs | 6 ++--- .../src/rules/ruff/rules/sort_dunder_all.rs | 8 +++--- .../src/rules/ruff/rules/sort_dunder_slots.rs | 25 +++++++++++++------ 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs index 250d9d9afc50c..10fddf25cc949 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs @@ -79,7 +79,7 @@ pub(super) enum DisplayKind<'a> { /// source code as a `range_replacement` autofix. pub(super) fn sort_single_line_elements_sequence( kind: &SequenceKind, - elts: &[&ast::Expr], + elts: &[ast::Expr], elements: &[&str], locator: &Locator, mut cmp_fn: F, @@ -114,7 +114,7 @@ where /// that can be inserted into the /// source code as a `range_replacement` autofix. pub(super) fn sort_single_line_elements_dict( - key_elts: &[&ast::Expr], + key_elts: &[ast::Expr], elements: &[&str], value_elts: &[ast::Expr], locator: &Locator, @@ -165,7 +165,7 @@ pub(super) enum SortClassification<'a> { } impl<'a> SortClassification<'a> { - pub(super) fn from_elements(elements: &'a [&ast::Expr], mut cmp_fn: F) -> Self + pub(super) fn from_elements(elements: &'a [ast::Expr], mut cmp_fn: F) -> Self where F: FnMut(&str, &str) -> Ordering, { diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs index 56230ecf62641..dce8b790307f2 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs @@ -16,7 +16,6 @@ use crate::rules::ruff::rules::sequence_sorting::{ sort_single_line_elements_sequence, SequenceKind, SortClassification, }; -use itertools::Itertools; use natord; /// ## What it does @@ -165,9 +164,8 @@ fn sort_dunder_all(checker: &mut Checker, target: &ast::Expr, node: &ast::Expr) } _ => return, }; - let elts = elts.iter().collect_vec(); - let elts_analysis = SortClassification::from_elements(&elts, |a, b| { + let elts_analysis = SortClassification::from_elements(elts, |a, b| { AllItemSortKey::from(a).cmp(&AllItemSortKey::from(b)) }); if elts_analysis.is_not_a_list_of_string_literals() || elts_analysis.is_sorted() { @@ -177,7 +175,7 @@ fn sort_dunder_all(checker: &mut Checker, target: &ast::Expr, node: &ast::Expr) let mut diagnostic = Diagnostic::new(UnsortedDunderAll, range); if let SortClassification::UnsortedAndMaybeFixable { items } = elts_analysis { - if let Some(fix) = create_fix(range, &elts, &items, &kind, checker) { + if let Some(fix) = create_fix(range, elts, &items, &kind, checker) { diagnostic.set_fix(fix); } } @@ -274,7 +272,7 @@ impl InferredMemberType { /// is unfixable in this instance. fn create_fix( range: TextRange, - elts: &[&ast::Expr], + elts: &[ast::Expr], string_items: &[&str], kind: &SequenceKind, checker: &Checker, diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs index 4412c3f30296d..05c03e9b8e409 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::fmt::Display; use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; @@ -159,19 +160,19 @@ fn sort_dunder_slots(checker: &mut Checker, target: &ast::Expr, node: &ast::Expr fn extract_elts( dunder_kind: SpecialClassDunder, node: &ast::Expr, -) -> Option<(Vec<&ast::Expr>, TextRange, DisplayKind<'_>)> { +) -> Option<(Cow<'_, Vec>, TextRange, DisplayKind<'_>)> { let result = match (dunder_kind, node) { (_, ast::Expr::List(ast::ExprList { elts, range, .. })) => ( - elts.iter().collect(), + Cow::Borrowed(elts), *range, DisplayKind::Sequence(SequenceKind::List), ), (_, ast::Expr::Tuple(tuple_node @ ast::ExprTuple { elts, range, .. })) => { let display_kind = DisplayKind::Sequence(SequenceKind::Tuple(tuple_node)); - (elts.iter().collect(), *range, display_kind) + (Cow::Borrowed(elts), *range, display_kind) } (SpecialClassDunder::Slots, ast::Expr::Set(ast::ExprSet { elts, range })) => ( - elts.iter().collect(), + Cow::Borrowed(elts), *range, DisplayKind::Sequence(SequenceKind::Set), ), @@ -183,10 +184,14 @@ fn extract_elts( range, }), ) => { - let mut narrowed_keys = Vec::with_capacity(keys.len()); + let mut narrowed_keys = Vec::with_capacity(values.len()); for key in keys { if let Some(key) = key { - narrowed_keys.push(key); + // This is somewhat unfortunate, + // *but* only `__slots__` can be a dict out of + // `__all__`, `__slots__` and `__match_args__`, + // and even for `__slots__`, using a dict is very rare + narrowed_keys.push(key.to_owned()); } else { return None; } @@ -196,7 +201,11 @@ fn extract_elts( // If `None` wasn't present in the keys, // the length of the keys should always equal the length of the values assert_eq!(narrowed_keys.len(), values.len()); - (narrowed_keys, *range, DisplayKind::Dict { values }) + ( + Cow::Owned(narrowed_keys), + *range, + DisplayKind::Dict { values }, + ) } _ => return None, }; @@ -205,7 +214,7 @@ fn extract_elts( fn create_fix( display_kind: DisplayKind<'_>, - elts: &[&ast::Expr], + elts: &[ast::Expr], items: &[&str], range: TextRange, locator: &Locator, From 883b561d9eac0419377d5477c9156b78e223043d Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Thu, 18 Jan 2024 17:54:56 +0000 Subject: [PATCH 06/21] Also implement the fix for multiline `__slots__` and `__match_args__` --- .../resources/test/fixtures/ruff/RUF023.py | 115 +++- .../src/rules/ruff/rules/sequence_sorting.rs | 620 +++++++++++++++++- .../src/rules/ruff/rules/sort_dunder_all.rs | 620 +----------------- .../src/rules/ruff/rules/sort_dunder_slots.rs | 191 +++--- ..._rules__ruff__tests__RUF023_RUF023.py.snap | 570 +++++++++++----- 5 files changed, 1270 insertions(+), 846 deletions(-) diff --git a/crates/ruff_linter/resources/test/fixtures/ruff/RUF023.py b/crates/ruff_linter/resources/test/fixtures/ruff/RUF023.py index b848779e1b786..877de85ac5bb9 100644 --- a/crates/ruff_linter/resources/test/fixtures/ruff/RUF023.py +++ b/crates/ruff_linter/resources/test/fixtures/ruff/RUF023.py @@ -9,7 +9,8 @@ class Klass: # Quoting style is retained, # but unnecessary parens are not __slots__: set = {'b', "c", ((('a')))} - # Trailing commas are also not retained in the fix + # Trailing commas are also not retained for single-line definitions + # (but they are in multiline definitions) __match_args__: tuple = ("b", "c", "a",) class Klass2: @@ -19,12 +20,14 @@ class Klass2: __slots__ = "foo3", "foo2", "foo1" # NB: an implicit tuple (without parens) __match_args__: list[str] = ["the", "three", "little", "pigs"] - __slots__ = ("parenthesized_item"), "in", ("an_unparenthesized_tuple") + # we use natural sort, + # not alphabetical sort or "isort-style" sort + __slots__ = {"aadvark237", "aadvark10092", "aadvark174", "aadvark532"} -################################################## -# Multiline definitions are flagged, but not fixed -################################################## +############################ +# Neat multiline definitions +############################ class Klass3: __slots__ = ( @@ -34,7 +37,6 @@ class Klass3: # a comment regarding 'a0': "a0" ) - __match_args__ = [ "d", "c", # a comment regarding 'c' @@ -43,18 +45,106 @@ class Klass3: "a" ] +########################################## +# Messier multiline __all__ definitions... +########################################## + class Klass4: - # we use natural sort, - # not alphabetical sort. - __slots__ = {"aadvark237", "aadvark10092", "aadvark174", "aadvark532"} + # comment0 + __slots__ = ("d", "a", # comment1 + # comment2 + "f", "b", + "strangely", # comment3 + # comment4 + "formatted", + # comment5 + ) # comment6 + # comment7 + + __match_args__ = [ # comment0 + # comment1 + # comment2 + "dx", "cx", "bx", "ax" # comment3 + # comment4 + # comment5 + # comment6 + ] # comment7 + +# from cpython/Lib/pathlib/__init__.py +class PurePath: + __slots__ = ( + # The `_raw_paths` slot stores unnormalized string paths. This is set + # in the `__init__()` method. + '_raw_paths', + + # The `_drv`, `_root` and `_tail_cached` slots store parsed and + # normalized parts of the path. They are set when any of the `drive`, + # `root` or `_tail` properties are accessed for the first time. The + # three-part division corresponds to the result of + # `os.path.splitroot()`, except that the tail is further split on path + # separators (i.e. it is a list of strings), and that the root and + # tail are normalized. + '_drv', '_root', '_tail_cached', + + # The `_str` slot stores the string representation of the path, + # computed from the drive, root and tail when `__str__()` is called + # for the first time. It's used to implement `_str_normcase` + '_str', + + # The `_str_normcase_cached` slot stores the string path with + # normalized case. It is set when the `_str_normcase` property is + # accessed for the first time. It's used to implement `__eq__()` + # `__hash__()`, and `_parts_normcase` + '_str_normcase_cached', + + # The `_parts_normcase_cached` slot stores the case-normalized + # string path after splitting on path separators. It's set when the + # `_parts_normcase` property is accessed for the first time. It's used + # to implement comparison methods like `__lt__()`. + '_parts_normcase_cached', + + # The `_hash` slot stores the hash of the case-normalized string + # path. It's set when `__hash__()` is called for the first time. + '_hash', + ) + +# From cpython/Lib/pickletools.py +class ArgumentDescriptor(object): + __slots__ = ( + # name of descriptor record, also a module global name; a string + 'name', + # length of argument, in bytes; an int; UP_TO_NEWLINE and + # TAKEN_FROM_ARGUMENT{1,4,8} are negative values for variable-length + # cases + 'n', + + # a function taking a file-like object, reading this kind of argument + # from the object at the current position, advancing the current + # position by n bytes, and returning the value of the argument + 'reader', + + # human-readable docs for this arg descriptor; a string + 'doc', + ) + +#################################### +# Should be flagged, but not fixed +#################################### + +# from cpython/Lib/test/test_inspect.py. +# Multiline dicts are out of scope. +class SlotUser: + __slots__ = {'power': 'measured in kilowatts', + 'distance': 'measured in kilometers'} + +class Klass5: __match_args__ = ( "look", ( "a_veeeeeeeeeeeeeeeeeeery_long_parenthesized_item" ), ) - __slots__ = ( "b", (( @@ -62,14 +152,13 @@ class Klass4: )), "a" ) - - __slots__ = ("don't" "care" "about", "__all__" "with", "concatenated" "strings") + __slots__ = ("don't" "care" "about", "__slots__" "with", "concatenated" "strings") ################################### # These should all not get flagged: ################################### -class Klass5: +class Klass6: __slots__ = () __match_args__ = [] __slots__ = ("single_item",) diff --git a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs index 10fddf25cc949..fc7b1999203c8 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs @@ -3,11 +3,15 @@ /// Examples where these are useful: /// - Sorting `__all__` in the global scope, /// - Sorting `__slots__` or `__match_args__` in a class scope +use std::borrow::Cow; use std::cmp::Ordering; use ruff_python_ast as ast; -use ruff_python_parser::Tok; +use ruff_python_codegen::Stylist; +use ruff_python_parser::{lexer, Mode, Tok}; +use ruff_python_trivia::leading_indentation; use ruff_source_file::Locator; +use ruff_text_size::{Ranged, TextRange, TextSize}; use is_macro; use itertools::{izip, Itertools}; @@ -165,7 +169,7 @@ pub(super) enum SortClassification<'a> { } impl<'a> SortClassification<'a> { - pub(super) fn from_elements(elements: &'a [ast::Expr], mut cmp_fn: F) -> Self + pub(super) fn of_elements(elements: &'a [ast::Expr], mut cmp_fn: F) -> Self where F: FnMut(&str, &str) -> Ordering, { @@ -200,3 +204,615 @@ impl<'a> SortClassification<'a> { Self::Sorted } } + +// An instance of this struct encapsulates an analysis +/// of a multiline Python tuple/list that represents an +/// `__all__`/`__slots__`/etc. definition or augmentation. +pub(super) struct MultilineStringSequenceValue { + items: Vec, + range: TextRange, + ends_with_trailing_comma: bool, +} + +impl MultilineStringSequenceValue { + pub(super) fn len(&self) -> usize { + self.items.len() + } + + /// Analyse the source range for a multiline Python tuple/list that + /// represents an `__all__`/`__slots__`/etc. definition or augmentation. + /// Return `None` if the analysis fails for whatever reason. + pub(super) fn from_source_range( + range: TextRange, + kind: &SequenceKind, + locator: &Locator, + ) -> Option { + // Parse the multiline string sequence using the raw tokens. + // See the docs for `collect_string_sequence_lines()` for why we have to + // use the raw tokens, rather than just the AST, to do this parsing. + // + // Step (1). Start by collecting information on each line individually: + let (lines, ends_with_trailing_comma) = + collect_string_sequence_lines(range, kind, locator)?; + + // Step (2). Group lines together into sortable "items": + // - Any "item" contains a single element of the list/tuple + // - Assume that any comments on their own line are meant to be grouped + // with the element immediately below them: if the element moves, + // the comments above the element move with it. + // - The same goes for any comments on the same line as an element: + // if the element moves, the comment moves with it. + let items = collect_string_sequence_items(lines, range, locator); + + Some(MultilineStringSequenceValue { + items, + range, + ends_with_trailing_comma, + }) + } + + /// Sort a multiline sequence of literal strings + /// that is known to be unsorted. + /// + /// This function panics if it is called and `self.items` + /// has length < 2. It's redundant to call this method in this case, + /// since lists with < 2 items cannot be unsorted, + /// so this is a logic error. + pub(super) fn into_sorted_source_code( + mut self, + cmp_fn: F, + locator: &Locator, + stylist: &Stylist, + ) -> String + where + F: FnMut(&StringSequenceItem, &StringSequenceItem) -> Ordering, + { + let (first_item_start, last_item_end) = match self.items.as_slice() { + [first_item, .., last_item] => (first_item.start(), last_item.end()), + _ => panic!( + "We shouldn't be attempting an autofix if a sequence has < 2 elements; + a sequence with 1 or 0 elements cannot be unsorted." + ), + }; + + // As well as the "items" in a multiline string sequence, + // there is also a "prelude" and a "postlude": + // - Prelude == the region of source code from the opening parenthesis, + // up to the start of the first item in `__all__`/`__slots__`/etc. + // - Postlude == the region of source code from the end of the last + // item in `__all__`/`__slots__`/etc. up to and including the closing + // parenthesis. + // + // For example: + // + // ```python + // __all__ = [ # comment0 + // # comment1 + // "first item", + // "last item" # comment2 + // # comment3 + // ] # comment4 + // ``` + // + // - The prelude in the above example is the source code region + // starting just before the opening `[` and ending just after `# comment0`. + // `comment0` here counts as part of the prelude because it is on + // the same line as the opening paren, and because we haven't encountered + // any elements of `__all__` yet, but `comment1` counts as part of the first item, + // as it's on its own line, and all comments on their own line are grouped + // with the next element below them to make "items", + // (an "item" being a region of source code that all moves as one unit + // when `__all__` is sorted). + // - The postlude in the above example is the source code region starting + // just after `# comment2` and ending just after the closing paren. + // `# comment2` is part of the last item, as it's an inline comment on the + // same line as an element, but `# comment3` becomes part of the postlude + // because there are no items below it. `# comment4` is not part of the + // postlude: it's outside of the source-code range considered by this rule, + // and should therefore be untouched. + // + let newline = stylist.line_ending().as_str(); + let start_offset = self.start(); + let leading_indent = leading_indentation(locator.full_line(start_offset)); + let item_indent = format!("{}{}", leading_indent, stylist.indentation().as_str()); + + let prelude = + multiline_string_sequence_prelude(first_item_start, newline, start_offset, locator); + let postlude = multiline_string_sequence_postlude( + last_item_end, + newline, + leading_indent, + &item_indent, + self.end(), + locator, + ); + + self.items.sort_by(cmp_fn); + let joined_items = join_multiline_string_sequence_items( + &self.items, + locator, + &item_indent, + newline, + self.ends_with_trailing_comma, + ); + + format!("{prelude}{joined_items}{postlude}") + } +} + +impl Ranged for MultilineStringSequenceValue { + fn range(&self) -> TextRange { + self.range + } +} + +/// Collect data on each line of a multiline string sequence. +/// Return `None` if the sequence appears to be invalid, +/// or if it's an edge case we don't support. +/// +/// Why do we need to do this using the raw tokens, +/// when we already have the AST? The AST strips out +/// crucial information that we need to track here for +/// a multiline string sequence, such as: +/// - The value of comments +/// - The amount of whitespace between the end of a line +/// and an inline comment +/// - Whether or not the final item in the tuple/list has a +/// trailing comma +/// +/// All of this information is necessary to have at a later +/// stage if we're to sort items without doing unnecessary +/// brutality to the comments and pre-existing style choices +/// in the original source code. +fn collect_string_sequence_lines( + range: TextRange, + kind: &SequenceKind, + locator: &Locator, +) -> Option<(Vec, bool)> { + // These first two variables are used for keeping track of state + // regarding the entirety of the string sequence... + let mut ends_with_trailing_comma = false; + let mut lines = vec![]; + // ... all state regarding a single line of a string sequence + // is encapsulated in this variable + let mut line_state = LineState::default(); + + // `lex_starts_at()` gives us absolute ranges rather than relative ranges, + // but (surprisingly) we still need to pass in the slice of code we want it to lex, + // rather than the whole source file: + let mut token_iter = + lexer::lex_starts_at(locator.slice(range), Mode::Expression, range.start()); + let (first_tok, _) = token_iter.next()?.ok()?; + if first_tok != kind.opening_token_for_multiline_definition() { + return None; + } + let expected_final_token = kind.closing_token_for_multiline_definition(); + + for pair in token_iter { + let (tok, subrange) = pair.ok()?; + match tok { + Tok::NonLogicalNewline => { + lines.push(line_state.into_string_sequence_line()); + line_state = LineState::default(); + } + Tok::Comment(_) => { + line_state.visit_comment_token(subrange); + } + Tok::String { value, .. } => { + line_state.visit_string_token(value, subrange); + ends_with_trailing_comma = false; + } + Tok::Comma => { + line_state.visit_comma_token(subrange); + ends_with_trailing_comma = true; + } + tok if tok == expected_final_token => { + lines.push(line_state.into_string_sequence_line()); + break; + } + _ => return None, + } + } + Some((lines, ends_with_trailing_comma)) +} + +/// This struct is for keeping track of state +/// regarding a single line in a multiline string sequence +/// It is purely internal to `collect_string_sequence_lines()`, +/// and should not be used outside that function. +/// +/// There are three possible kinds of line in a multiline +/// string sequence, and we don't know what kind of a line +/// we're in until all tokens in that line have been processed: +/// +/// - A line with just a comment +/// (`StringSequenceLine::JustAComment)`) +/// - A line with one or more string items in it +/// (`StringSequenceLine::OneOrMoreItems`) +/// - An empty line (`StringSequenceLine::Empty`) +/// +/// As we process the tokens in a single line, +/// this struct accumulates the necessary state for us +/// to be able to determine what kind of a line we're in. +/// Once the entire line has been processed, +/// `into_string_sequence_line()` is called, which consumes +/// `self` and produces the classification for the line. +#[derive(Debug, Default)] +struct LineState { + first_item_in_line: Option<(String, TextRange)>, + following_items_in_line: Vec<(String, TextRange)>, + comment_range_start: Option, + comment_in_line: Option, +} + +impl LineState { + fn visit_string_token(&mut self, token_value: String, token_range: TextRange) { + if self.first_item_in_line.is_none() { + self.first_item_in_line = Some((token_value, token_range)); + } else { + self.following_items_in_line + .push((token_value, token_range)); + } + self.comment_range_start = Some(token_range.end()); + } + + fn visit_comma_token(&mut self, token_range: TextRange) { + self.comment_range_start = Some(token_range.end()); + } + + /// If this is a comment on its own line, + /// record the range of that comment. + /// + /// *If*, however, we've already seen a comma + /// or a string in this line, that means that we're + /// in a line with items. In that case, we want to + /// record the range of the comment, *plus* the whitespace + /// (if any) preceding the comment. This is so that we don't + /// unnecessarily apply opinionated formatting changes + /// where they might not be welcome. + fn visit_comment_token(&mut self, token_range: TextRange) { + self.comment_in_line = { + if let Some(comment_range_start) = self.comment_range_start { + Some(TextRange::new(comment_range_start, token_range.end())) + } else { + Some(token_range) + } + } + } + + fn into_string_sequence_line(self) -> StringSequenceLine { + if let Some(first_item) = self.first_item_in_line { + StringSequenceLine::OneOrMoreItems(LineWithItems { + first_item, + following_items: self.following_items_in_line, + trailing_comment_range: self.comment_in_line, + }) + } else { + self.comment_in_line + .map_or(StringSequenceLine::Empty, |comment_range| { + StringSequenceLine::JustAComment(LineWithJustAComment(comment_range)) + }) + } + } +} + +/// Instances of this struct represent source-code lines in the middle +/// of multiline tuples/lists/sets where the line contains +/// 0 elements of the sequence, but the line does have a comment in it. +#[derive(Debug)] +struct LineWithJustAComment(TextRange); + +/// Instances of this struct represent source-code lines in single-line +/// or multiline tuples/lists/sets where the line contains at least +/// 1 element of the sequence. The line may contain > 1 element of the +/// sequence, and may also have a trailing comment after the element(s). +#[derive(Debug)] +struct LineWithItems { + // For elements in the list, we keep track of the value of the + // value of the element as well as the source-code range of the element. + // (We need to know the actual value so that we can sort the items.) + first_item: (String, TextRange), + following_items: Vec<(String, TextRange)>, + // For comments, we only need to keep track of the source-code range. + trailing_comment_range: Option, +} + +impl LineWithItems { + fn num_items(&self) -> usize { + self.following_items.len() + 1 + } +} + +/// An enumeration of the possible kinds of source-code lines +/// that can exist in a multiline string sequence: +/// +/// - A line that has no string elements, but does have a comment. +/// - A line that has one or more string elements, +/// and may also have a trailing comment. +/// - An entirely empty line. +#[derive(Debug)] +enum StringSequenceLine { + JustAComment(LineWithJustAComment), + OneOrMoreItems(LineWithItems), + Empty, +} + +/// Given data on each line in a multiline string sequence, +/// group lines together into "items". +/// +/// Each item contains exactly one string element, +/// but might contain multiple comments attached to that element +/// that must move with the element when the string sequence is sorted. +/// +/// Note that any comments following the last item are discarded here, +/// but that doesn't matter: we add them back in `into_sorted_source_code()` +/// as part of the `postlude` (see comments in that function) +fn collect_string_sequence_items( + lines: Vec, + dunder_all_range: TextRange, + locator: &Locator, +) -> Vec { + let mut all_items = Vec::with_capacity(match lines.as_slice() { + [StringSequenceLine::OneOrMoreItems(single)] => single.num_items(), + _ => lines.len(), + }); + let mut first_item_encountered = false; + let mut preceding_comment_ranges = vec![]; + for line in lines { + match line { + StringSequenceLine::JustAComment(LineWithJustAComment(comment_range)) => { + // Comments on the same line as the opening paren and before any elements + // count as part of the "prelude"; these are not grouped into any item... + if first_item_encountered + || locator.line_start(comment_range.start()) + != locator.line_start(dunder_all_range.start()) + { + // ...but for all other comments that precede an element, + // group the comment with the element following that comment + // into an "item", so that the comment moves as one with the element + // when the list/tuple/set is sorted + preceding_comment_ranges.push(comment_range); + } + } + StringSequenceLine::OneOrMoreItems(LineWithItems { + first_item: (first_val, first_range), + following_items, + trailing_comment_range: comment_range, + }) => { + first_item_encountered = true; + all_items.push(StringSequenceItem::new( + first_val, + std::mem::take(&mut preceding_comment_ranges), + first_range, + comment_range, + )); + for (value, range) in following_items { + all_items.push(StringSequenceItem::with_no_comments(value, range)); + } + } + StringSequenceLine::Empty => continue, // discard empty lines + } + } + all_items +} + +/// An instance of this struct represents a single element +/// from a multiline string sequence, *and* any comments that +/// are "attached" to it. The comments "attached" to the element +/// will move with the element when the tuple/list/set is sorted. +/// +/// Comments on their own line immediately preceding the element will +/// always form a contiguous range with the range of the element itself; +/// however, inline comments won't necessary form a contiguous range. +/// Consider the following scenario, where both `# comment0` and `# comment1` +/// will move with the "a" element when the list is sorted: +/// +/// ```python +/// __all__ = [ +/// "b", +/// # comment0 +/// "a", "c", # comment1 +/// ] +/// ``` +/// +/// The desired outcome here is: +/// +/// ```python +/// __all__ = [ +/// # comment0 +/// "a", # comment1 +/// "b", +/// "c", +/// ] +/// ``` +/// +/// To achieve this, both `# comment0` and `# comment1` +/// are grouped into the `StringSequenceItem` instance +/// where the value is `"a"`, even though the source-code range +/// of `# comment1` does not form a contiguous range with the +/// source-code range of `"a"`. +#[derive(Debug)] +pub(super) struct StringSequenceItem { + value: String, + preceding_comment_ranges: Vec, + element_range: TextRange, + // total_range incorporates the ranges of preceding comments + // (which must be contiguous with the element), + // but doesn't incorporate any trailing comments + // (which might be contiguous, but also might not be) + total_range: TextRange, + end_of_line_comments: Option, +} + +impl StringSequenceItem { + pub(super) fn value(&self) -> &str { + &self.value + } + + fn new( + value: String, + preceding_comment_ranges: Vec, + element_range: TextRange, + end_of_line_comments: Option, + ) -> Self { + let total_range = { + if let Some(first_comment_range) = preceding_comment_ranges.first() { + TextRange::new(first_comment_range.start(), element_range.end()) + } else { + element_range + } + }; + Self { + value, + preceding_comment_ranges, + element_range, + total_range, + end_of_line_comments, + } + } + + fn with_no_comments(value: String, element_range: TextRange) -> Self { + Self::new(value, vec![], element_range, None) + } +} + +impl Ranged for StringSequenceItem { + fn range(&self) -> TextRange { + self.total_range + } +} + +/// Return a string representing the "prelude" for a +/// multiline string sequence. +/// +/// See inline comments in +/// `MultilineStringSequenceValue::into_sorted_source_code()` +/// for a definition of the term "prelude" in this context. +fn multiline_string_sequence_prelude<'a>( + first_item_start_offset: TextSize, + newline: &str, + dunder_all_offset: TextSize, + locator: &'a Locator, +) -> Cow<'a, str> { + let prelude_end = { + let first_item_line_offset = locator.line_start(first_item_start_offset); + if first_item_line_offset == locator.line_start(dunder_all_offset) { + first_item_start_offset + } else { + first_item_line_offset + } + }; + let prelude = locator.slice(TextRange::new(dunder_all_offset, prelude_end)); + if prelude.ends_with(['\r', '\n']) { + Cow::Borrowed(prelude) + } else { + Cow::Owned(format!("{}{}", prelude.trim_end(), newline)) + } +} + +/// Join the elements and comments of a multiline string sequence +/// definition into a single string. +/// +/// The resulting string does not include the "prelude" or +/// "postlude" of the tuple/set/list. +/// (See inline comments in +/// `MultilineStringSequence::into_sorted_source_code()` for +/// definitions of the terms "prelude" and "postlude" in this +/// context.) +fn join_multiline_string_sequence_items( + sorted_items: &[StringSequenceItem], + locator: &Locator, + item_indent: &str, + newline: &str, + needs_trailing_comma: bool, +) -> String { + let last_item_index = sorted_items.len() - 1; + + let mut new_dunder_all = String::new(); + for (i, item) in sorted_items.iter().enumerate() { + let is_final_item = i == last_item_index; + for comment_range in &item.preceding_comment_ranges { + new_dunder_all.push_str(item_indent); + new_dunder_all.push_str(locator.slice(comment_range)); + new_dunder_all.push_str(newline); + } + new_dunder_all.push_str(item_indent); + new_dunder_all.push_str(locator.slice(item.element_range)); + if !is_final_item || needs_trailing_comma { + new_dunder_all.push(','); + } + if let Some(trailing_comments) = item.end_of_line_comments { + new_dunder_all.push_str(locator.slice(trailing_comments)); + } + if !is_final_item { + new_dunder_all.push_str(newline); + } + } + new_dunder_all +} + +/// Return a string representing the "postlude" for a +/// multiline string sequence. +/// +/// See inline comments in +/// `MultilineStringSequence::into_sorted_source_code()` +/// for a definition of the term "postlude" in this context. +fn multiline_string_sequence_postlude<'a>( + last_item_end_offset: TextSize, + newline: &str, + leading_indent: &str, + item_indent: &str, + dunder_all_range_end: TextSize, + locator: &'a Locator, +) -> Cow<'a, str> { + let postlude_start = { + let last_item_line_offset = locator.line_end(last_item_end_offset); + if last_item_line_offset == locator.line_end(dunder_all_range_end) { + last_item_end_offset + } else { + last_item_line_offset + } + }; + let postlude = locator.slice(TextRange::new(postlude_start, dunder_all_range_end)); + + // The rest of this function uses heuristics to + // avoid very long indents for the closing paren + // that don't match the style for the rest of the + // fixed-up multiline string sequence. + // + // For example, we want to avoid something like this + // (not uncommon in code that hasn't been + // autoformatted)... + // + // ```python + // __all__ = ["xxxxxx", "yyyyyy", + // "aaaaaa", "bbbbbb", + // ] + // ``` + // + // ...getting autofixed to this: + // + // ```python + // __all__ = [ + // "a", + // "b", + // "x", + // "y", + // ] + // ``` + let newline_chars = ['\r', '\n']; + if !postlude.starts_with(newline_chars) { + return Cow::Borrowed(postlude); + } + if TextSize::of(leading_indentation( + postlude.trim_start_matches(newline_chars), + )) <= TextSize::of(item_indent) + { + return Cow::Borrowed(postlude); + } + let trimmed_postlude = postlude.trim_start(); + if trimmed_postlude.starts_with([']', ')']) { + return Cow::Owned(format!("{newline}{leading_indent}{trimmed_postlude}")); + } + Cow::Borrowed(postlude) +} diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs index dce8b790307f2..3ccff475e7b74 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs @@ -1,19 +1,15 @@ -use std::borrow::Cow; use std::cmp::Ordering; use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast as ast; -use ruff_python_codegen::Stylist; -use ruff_python_parser::{lexer, Mode, Tok}; use ruff_python_stdlib::str::is_cased_uppercase; -use ruff_python_trivia::leading_indentation; -use ruff_source_file::Locator; -use ruff_text_size::{Ranged, TextRange, TextSize}; +use ruff_text_size::TextRange; use crate::checkers::ast::Checker; use crate::rules::ruff::rules::sequence_sorting::{ - sort_single_line_elements_sequence, SequenceKind, SortClassification, + sort_single_line_elements_sequence, MultilineStringSequenceValue, SequenceKind, + SortClassification, StringSequenceItem, }; use natord; @@ -165,7 +161,7 @@ fn sort_dunder_all(checker: &mut Checker, target: &ast::Expr, node: &ast::Expr) _ => return, }; - let elts_analysis = SortClassification::from_elements(elts, |a, b| { + let elts_analysis = SortClassification::of_elements(elts, |a, b| { AllItemSortKey::from(a).cmp(&AllItemSortKey::from(b)) }); if elts_analysis.is_not_a_list_of_string_literals() || elts_analysis.is_sorted() { @@ -224,9 +220,9 @@ impl<'a> From<&'a str> for AllItemSortKey<'a> { } } -impl<'a> From<&'a DunderAllItem> for AllItemSortKey<'a> { - fn from(item: &'a DunderAllItem) -> Self { - Self::from(item.value.as_str()) +impl<'a> From<&'a StringSequenceItem> for AllItemSortKey<'a> { + fn from(item: &'a StringSequenceItem) -> Self { + Self::from(item.value()) } } @@ -295,9 +291,13 @@ fn create_fix( // bare minimum of token-processing for single-line `__all__` // definitions: if is_multiline { - let value = MultilineDunderAllValue::from_source_range(range, kind, locator)?; - assert_eq!(value.items.len(), elts.len()); - value.into_sorted_source_code(locator, checker.stylist()) + let value = MultilineStringSequenceValue::from_source_range(range, kind, locator)?; + assert_eq!(value.len(), elts.len()); + value.into_sorted_source_code( + |this, next| AllItemSortKey::from(this).cmp(&AllItemSortKey::from(next)), + locator, + checker.stylist(), + ) } else { sort_single_line_elements_sequence(kind, elts, string_items, locator, |a, b| { AllItemSortKey::from(a).cmp(&AllItemSortKey::from(b)) @@ -310,595 +310,3 @@ fn create_fix( range, ))) } - -/// An instance of this struct encapsulates an analysis -/// of a multiline Python tuple/list that represents an -/// `__all__` definition or augmentation. -struct MultilineDunderAllValue { - items: Vec, - range: TextRange, - ends_with_trailing_comma: bool, -} - -impl MultilineDunderAllValue { - /// Analyse the source range for a multiline Python tuple/list that - /// represents an `__all__` definition or augmentation. Return `None` - /// if the analysis fails for whatever reason. - fn from_source_range( - range: TextRange, - kind: &SequenceKind, - locator: &Locator, - ) -> Option { - // Parse the multiline `__all__` definition using the raw tokens. - // See the docs for `collect_dunder_all_lines()` for why we have to - // use the raw tokens, rather than just the AST, to do this parsing. - // - // Step (1). Start by collecting information on each line individually: - let (lines, ends_with_trailing_comma) = collect_dunder_all_lines(range, kind, locator)?; - - // Step (2). Group lines together into sortable "items": - // - Any "item" contains a single element of the `__all__` list/tuple - // - Assume that any comments on their own line are meant to be grouped - // with the element immediately below them: if the element moves, - // the comments above the element move with it. - // - The same goes for any comments on the same line as an element: - // if the element moves, the comment moves with it. - let items = collect_dunder_all_items(lines, range, locator); - - Some(MultilineDunderAllValue { - items, - range, - ends_with_trailing_comma, - }) - } - - /// Sort a multiline `__all__` definition - /// that is known to be unsorted. - /// - /// Panics if this is called and `self.items` - /// has length < 2. It's redundant to call this method in this case, - /// since lists with < 2 items cannot be unsorted, - /// so this is a logic error. - fn into_sorted_source_code(mut self, locator: &Locator, stylist: &Stylist) -> String { - let (first_item_start, last_item_end) = match self.items.as_slice() { - [first_item, .., last_item] => (first_item.start(), last_item.end()), - _ => panic!( - "We shouldn't be attempting an autofix if `__all__` has < 2 elements; - an `__all__` definition with 1 or 0 elements cannot be unsorted." - ), - }; - - // As well as the "items" in the `__all__` definition, - // there is also a "prelude" and a "postlude": - // - Prelude == the region of source code from the opening parenthesis, - // up to the start of the first item in `__all__`. - // - Postlude == the region of source code from the end of the last - // item in `__all__` up to and including the closing parenthesis. - // - // For example: - // - // ```python - // __all__ = [ # comment0 - // # comment1 - // "first item", - // "last item" # comment2 - // # comment3 - // ] # comment4 - // ``` - // - // - The prelude in the above example is the source code region - // starting just before the opening `[` and ending just after `# comment0`. - // `comment0` here counts as part of the prelude because it is on - // the same line as the opening paren, and because we haven't encountered - // any elements of `__all__` yet, but `comment1` counts as part of the first item, - // as it's on its own line, and all comments on their own line are grouped - // with the next element below them to make "items", - // (an "item" being a region of source code that all moves as one unit - // when `__all__` is sorted). - // - The postlude in the above example is the source code region starting - // just after `# comment2` and ending just after the closing paren. - // `# comment2` is part of the last item, as it's an inline comment on the - // same line as an element, but `# comment3` becomes part of the postlude - // because there are no items below it. `# comment4` is not part of the - // postlude: it's outside of the source-code range considered by this rule, - // and should therefore be untouched. - // - let newline = stylist.line_ending().as_str(); - let start_offset = self.start(); - let leading_indent = leading_indentation(locator.full_line(start_offset)); - let item_indent = format!("{}{}", leading_indent, stylist.indentation().as_str()); - - let prelude = - multiline_dunder_all_prelude(first_item_start, newline, start_offset, locator); - let postlude = multiline_dunder_all_postlude( - last_item_end, - newline, - leading_indent, - &item_indent, - self.end(), - locator, - ); - - self.items - .sort_by(|this, next| AllItemSortKey::from(this).cmp(&AllItemSortKey::from(next))); - let joined_items = join_multiline_dunder_all_items( - &self.items, - locator, - &item_indent, - newline, - self.ends_with_trailing_comma, - ); - - format!("{prelude}{joined_items}{postlude}") - } -} - -impl Ranged for MultilineDunderAllValue { - fn range(&self) -> TextRange { - self.range - } -} - -/// Collect data on each line of a multiline `__all__` definition. -/// Return `None` if `__all__` appears to be invalid, -/// or if it's an edge case we don't support. -/// -/// Why do we need to do this using the raw tokens, -/// when we already have the AST? The AST strips out -/// crucial information that we need to track here for -/// a multiline `__all__` definition, such as: -/// - The value of comments -/// - The amount of whitespace between the end of a line -/// and an inline comment -/// - Whether or not the final item in the tuple/list has a -/// trailing comma -/// -/// All of this information is necessary to have at a later -/// stage if we're to sort items without doing unnecessary -/// brutality to the comments and pre-existing style choices -/// in the original source code. -fn collect_dunder_all_lines( - range: TextRange, - kind: &SequenceKind, - locator: &Locator, -) -> Option<(Vec, bool)> { - // These first two variables are used for keeping track of state - // regarding the entirety of the `__all__` definition... - let mut ends_with_trailing_comma = false; - let mut lines = vec![]; - // ... all state regarding a single line of an `__all__` definition - // is encapsulated in this variable - let mut line_state = LineState::default(); - - // `lex_starts_at()` gives us absolute ranges rather than relative ranges, - // but (surprisingly) we still need to pass in the slice of code we want it to lex, - // rather than the whole source file: - let mut token_iter = - lexer::lex_starts_at(locator.slice(range), Mode::Expression, range.start()); - let (first_tok, _) = token_iter.next()?.ok()?; - if first_tok != kind.opening_token_for_multiline_definition() { - return None; - } - let expected_final_token = kind.closing_token_for_multiline_definition(); - - for pair in token_iter { - let (tok, subrange) = pair.ok()?; - match tok { - Tok::NonLogicalNewline => { - lines.push(line_state.into_dunder_all_line()); - line_state = LineState::default(); - } - Tok::Comment(_) => { - line_state.visit_comment_token(subrange); - } - Tok::String { value, .. } => { - line_state.visit_string_token(value, subrange); - ends_with_trailing_comma = false; - } - Tok::Comma => { - line_state.visit_comma_token(subrange); - ends_with_trailing_comma = true; - } - tok if tok == expected_final_token => { - lines.push(line_state.into_dunder_all_line()); - break; - } - _ => return None, - } - } - Some((lines, ends_with_trailing_comma)) -} - -/// This struct is for keeping track of state -/// regarding a single line in a multiline `__all__` definition. -/// It is purely internal to `collect_dunder_all_lines()`, -/// and should not be used outside that function. -/// -/// There are three possible kinds of line in a multiline -/// `__all__` definition, and we don't know what kind of a line -/// we're in until all tokens in that line have been processed: -/// -/// - A line with just a comment (`DunderAllLine::JustAComment)`) -/// - A line with one or more string items in it (`DunderAllLine::OneOrMoreItems`) -/// - An empty line (`DunderAllLine::Empty`) -/// -/// As we process the tokens in a single line, -/// this struct accumulates the necessary state for us -/// to be able to determine what kind of a line we're in. -/// Once the entire line has been processed, `into_dunder_all_line()` -/// is called, which consumes `self` and produces the -/// classification for the line. -#[derive(Debug, Default)] -struct LineState { - first_item_in_line: Option<(String, TextRange)>, - following_items_in_line: Vec<(String, TextRange)>, - comment_range_start: Option, - comment_in_line: Option, -} - -impl LineState { - fn visit_string_token(&mut self, token_value: String, token_range: TextRange) { - if self.first_item_in_line.is_none() { - self.first_item_in_line = Some((token_value, token_range)); - } else { - self.following_items_in_line - .push((token_value, token_range)); - } - self.comment_range_start = Some(token_range.end()); - } - - fn visit_comma_token(&mut self, token_range: TextRange) { - self.comment_range_start = Some(token_range.end()); - } - - /// If this is a comment on its own line, - /// record the range of that comment. - /// - /// *If*, however, we've already seen a comma - /// or a string in this line, that means that we're - /// in a line with items. In that case, we want to - /// record the range of the comment, *plus* the whitespace - /// (if any) preceding the comment. This is so that we don't - /// unnecessarily apply opinionated formatting changes - /// where they might not be welcome. - fn visit_comment_token(&mut self, token_range: TextRange) { - self.comment_in_line = { - if let Some(comment_range_start) = self.comment_range_start { - Some(TextRange::new(comment_range_start, token_range.end())) - } else { - Some(token_range) - } - } - } - - fn into_dunder_all_line(self) -> DunderAllLine { - if let Some(first_item) = self.first_item_in_line { - DunderAllLine::OneOrMoreItems(LineWithItems { - first_item, - following_items: self.following_items_in_line, - trailing_comment_range: self.comment_in_line, - }) - } else { - self.comment_in_line - .map_or(DunderAllLine::Empty, |comment_range| { - DunderAllLine::JustAComment(LineWithJustAComment(comment_range)) - }) - } - } -} - -/// Instances of this struct represent source-code lines in the middle -/// of multiline `__all__` tuples/lists where the line contains -/// 0 elements of the tuple/list, but the line does have a comment in it. -#[derive(Debug)] -struct LineWithJustAComment(TextRange); - -/// Instances of this struct represent source-code lines in single-line -/// or multiline `__all__` tuples/lists where the line contains at least -/// 1 element of the tuple/list. The line may contain > 1 element of the -/// tuple/list, and may also have a trailing comment after the element(s). -#[derive(Debug)] -struct LineWithItems { - // For elements in the list, we keep track of the value of the - // value of the element as well as the source-code range of the element. - // (We need to know the actual value so that we can sort the items.) - first_item: (String, TextRange), - following_items: Vec<(String, TextRange)>, - // For comments, we only need to keep track of the source-code range. - trailing_comment_range: Option, -} - -impl LineWithItems { - fn num_items(&self) -> usize { - self.following_items.len() + 1 - } -} - -/// An enumeration of the possible kinds of source-code lines -/// that can exist in a multiline `__all__` tuple or list: -/// -/// - A line that has no string elements, but does have a comment. -/// - A line that has one or more string elements, -/// and may also have a trailing comment. -/// - An entirely empty line. -#[derive(Debug)] -enum DunderAllLine { - JustAComment(LineWithJustAComment), - OneOrMoreItems(LineWithItems), - Empty, -} - -/// Given data on each line in a multiline `__all__` definition, -/// group lines together into "items". -/// -/// Each item contains exactly one string element, -/// but might contain multiple comments attached to that element -/// that must move with the element when `__all__` is sorted. -/// -/// Note that any comments following the last item are discarded here, -/// but that doesn't matter: we add them back in `into_sorted_source_code()` -/// as part of the `postlude` (see comments in that function) -fn collect_dunder_all_items( - lines: Vec, - dunder_all_range: TextRange, - locator: &Locator, -) -> Vec { - let mut all_items = Vec::with_capacity(match lines.as_slice() { - [DunderAllLine::OneOrMoreItems(single)] => single.num_items(), - _ => lines.len(), - }); - let mut first_item_encountered = false; - let mut preceding_comment_ranges = vec![]; - for line in lines { - match line { - DunderAllLine::JustAComment(LineWithJustAComment(comment_range)) => { - // Comments on the same line as the opening paren and before any elements - // count as part of the "prelude"; these are not grouped into any item... - if first_item_encountered - || locator.line_start(comment_range.start()) - != locator.line_start(dunder_all_range.start()) - { - // ...but for all other comments that precede an element, - // group the comment with the element following that comment - // into an "item", so that the comment moves as one with the element - // when the `__all__` list/tuple is sorted - preceding_comment_ranges.push(comment_range); - } - } - DunderAllLine::OneOrMoreItems(LineWithItems { - first_item: (first_val, first_range), - following_items, - trailing_comment_range: comment_range, - }) => { - first_item_encountered = true; - all_items.push(DunderAllItem::new( - first_val, - std::mem::take(&mut preceding_comment_ranges), - first_range, - comment_range, - )); - for (value, range) in following_items { - all_items.push(DunderAllItem::with_no_comments(value, range)); - } - } - DunderAllLine::Empty => continue, // discard empty lines - } - } - all_items -} - -/// An instance of this struct represents a single element -/// from a multiline `__all__` tuple/list, *and* any comments that -/// are "attached" to it. The comments "attached" to the element -/// will move with the element when the `__all__` tuple/list is sorted. -/// -/// Comments on their own line immediately preceding the element will -/// always form a contiguous range with the range of the element itself; -/// however, inline comments won't necessary form a contiguous range. -/// Consider the following scenario, where both `# comment0` and `# comment1` -/// will move with the "a" element when the list is sorted: -/// -/// ```python -/// __all__ = [ -/// "b", -/// # comment0 -/// "a", "c", # comment1 -/// ] -/// ``` -/// -/// The desired outcome here is: -/// -/// ```python -/// __all__ = [ -/// # comment0 -/// "a", # comment1 -/// "b", -/// "c", -/// ] -/// ``` -/// -/// To achieve this, both `# comment0` and `# comment1` -/// are grouped into the `DunderAllItem` instance -/// where the value is `"a"`, even though the source-code range -/// of `# comment1` does not form a contiguous range with the -/// source-code range of `"a"`. -#[derive(Debug)] -struct DunderAllItem { - value: String, - preceding_comment_ranges: Vec, - element_range: TextRange, - // total_range incorporates the ranges of preceding comments - // (which must be contiguous with the element), - // but doesn't incorporate any trailing comments - // (which might be contiguous, but also might not be) - total_range: TextRange, - end_of_line_comments: Option, -} - -impl DunderAllItem { - fn new( - value: String, - preceding_comment_ranges: Vec, - element_range: TextRange, - end_of_line_comments: Option, - ) -> Self { - let total_range = { - if let Some(first_comment_range) = preceding_comment_ranges.first() { - TextRange::new(first_comment_range.start(), element_range.end()) - } else { - element_range - } - }; - Self { - value, - preceding_comment_ranges, - element_range, - total_range, - end_of_line_comments, - } - } - - fn with_no_comments(value: String, element_range: TextRange) -> Self { - Self::new(value, vec![], element_range, None) - } -} - -impl Ranged for DunderAllItem { - fn range(&self) -> TextRange { - self.total_range - } -} - -/// Return a string representing the "prelude" for a -/// multiline `__all__` definition. -/// -/// See inline comments in -/// `MultilineDunderAllValue::into_sorted_source_code()` -/// for a definition of the term "prelude" in this context. -fn multiline_dunder_all_prelude<'a>( - first_item_start_offset: TextSize, - newline: &str, - dunder_all_offset: TextSize, - locator: &'a Locator, -) -> Cow<'a, str> { - let prelude_end = { - let first_item_line_offset = locator.line_start(first_item_start_offset); - if first_item_line_offset == locator.line_start(dunder_all_offset) { - first_item_start_offset - } else { - first_item_line_offset - } - }; - let prelude = locator.slice(TextRange::new(dunder_all_offset, prelude_end)); - if prelude.ends_with(['\r', '\n']) { - Cow::Borrowed(prelude) - } else { - Cow::Owned(format!("{}{}", prelude.trim_end(), newline)) - } -} - -/// Join the elements and comments of a multiline `__all__` -/// definition into a single string. -/// -/// The resulting string does not include the "prelude" or -/// "postlude" of the `__all__` definition. -/// (See inline comments in `MultilineDunderAllValue::into_sorted_source_code()` -/// for definitions of the terms "prelude" and "postlude" -/// in this context.) -fn join_multiline_dunder_all_items( - sorted_items: &[DunderAllItem], - locator: &Locator, - item_indent: &str, - newline: &str, - needs_trailing_comma: bool, -) -> String { - let last_item_index = sorted_items.len() - 1; - - let mut new_dunder_all = String::new(); - for (i, item) in sorted_items.iter().enumerate() { - let is_final_item = i == last_item_index; - for comment_range in &item.preceding_comment_ranges { - new_dunder_all.push_str(item_indent); - new_dunder_all.push_str(locator.slice(comment_range)); - new_dunder_all.push_str(newline); - } - new_dunder_all.push_str(item_indent); - new_dunder_all.push_str(locator.slice(item.element_range)); - if !is_final_item || needs_trailing_comma { - new_dunder_all.push(','); - } - if let Some(trailing_comments) = item.end_of_line_comments { - new_dunder_all.push_str(locator.slice(trailing_comments)); - } - if !is_final_item { - new_dunder_all.push_str(newline); - } - } - new_dunder_all -} - -/// Return a string representing the "postlude" for a -/// multiline `__all__` definition. -/// -/// See inline comments in -/// `MultilineDunderAllValue::into_sorted_source_code()` -/// for a definition of the term "postlude" in this context. -fn multiline_dunder_all_postlude<'a>( - last_item_end_offset: TextSize, - newline: &str, - leading_indent: &str, - item_indent: &str, - dunder_all_range_end: TextSize, - locator: &'a Locator, -) -> Cow<'a, str> { - let postlude_start = { - let last_item_line_offset = locator.line_end(last_item_end_offset); - if last_item_line_offset == locator.line_end(dunder_all_range_end) { - last_item_end_offset - } else { - last_item_line_offset - } - }; - let postlude = locator.slice(TextRange::new(postlude_start, dunder_all_range_end)); - - // The rest of this function uses heuristics to - // avoid very long indents for the closing paren - // that don't match the style for the rest of the - // new `__all__` definition. - // - // For example, we want to avoid something like this - // (not uncommon in code that hasn't been - // autoformatted)... - // - // ```python - // __all__ = ["xxxxxx", "yyyyyy", - // "aaaaaa", "bbbbbb", - // ] - // ``` - // - // ...getting autofixed to this: - // - // ```python - // __all__ = [ - // "a", - // "b", - // "x", - // "y", - // ] - // ``` - let newline_chars = ['\r', '\n']; - if !postlude.starts_with(newline_chars) { - return Cow::Borrowed(postlude); - } - if TextSize::of(leading_indentation( - postlude.trim_start_matches(newline_chars), - )) <= TextSize::of(item_indent) - { - return Cow::Borrowed(postlude); - } - let trimmed_postlude = postlude.trim_start(); - if trimmed_postlude.starts_with([']', ')']) { - return Cow::Owned(format!("{newline}{leading_indent}{trimmed_postlude}")); - } - Cow::Borrowed(postlude) -} diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs index 05c03e9b8e409..82047410e3c62 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs @@ -5,13 +5,12 @@ use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast as ast; use ruff_python_semantic::ScopeKind; -use ruff_source_file::Locator; use ruff_text_size::TextRange; use crate::checkers::ast::Checker; use crate::rules::ruff::rules::sequence_sorting::{ - sort_single_line_elements_dict, sort_single_line_elements_sequence, DisplayKind, SequenceKind, - SortClassification, + sort_single_line_elements_dict, sort_single_line_elements_sequence, DisplayKind, + MultilineStringSequenceValue, SequenceKind, SortClassification, }; use natord; @@ -130,12 +129,12 @@ fn sort_dunder_slots(checker: &mut Checker, target: &ast::Expr, node: &ast::Expr return; }; - let Some((elts, range, display_kind)) = extract_elts(dunder_kind, node) else { + let Some(elts_analysis) = NodeAnalysis::new(node, dunder_kind) else { return; }; - let elts_analysis = SortClassification::from_elements(&elts, natord::compare); - if elts_analysis.is_not_a_list_of_string_literals() || elts_analysis.is_sorted() { + let sort_classification = SortClassification::of_elements(&elts_analysis.elts, natord::compare); + if sort_classification.is_not_a_list_of_string_literals() || sort_classification.is_sorted() { return; } @@ -144,92 +143,130 @@ fn sort_dunder_slots(checker: &mut Checker, target: &ast::Expr, node: &ast::Expr class_name: class_name.to_string(), class_variable: dunder_kind, }, - range, + elts_analysis.range, ); - if let SortClassification::UnsortedAndMaybeFixable { items } = elts_analysis { - let locator = checker.locator(); - if !locator.contains_line_break(range) { - diagnostic.set_fix(create_fix(display_kind, &elts, &items, range, locator)); + if let SortClassification::UnsortedAndMaybeFixable { items } = sort_classification { + if let Some(fix) = create_fix(&elts_analysis, &items, checker) { + diagnostic.set_fix(fix); } } checker.diagnostics.push(diagnostic); } -fn extract_elts( - dunder_kind: SpecialClassDunder, - node: &ast::Expr, -) -> Option<(Cow<'_, Vec>, TextRange, DisplayKind<'_>)> { - let result = match (dunder_kind, node) { - (_, ast::Expr::List(ast::ExprList { elts, range, .. })) => ( - Cow::Borrowed(elts), - *range, - DisplayKind::Sequence(SequenceKind::List), - ), - (_, ast::Expr::Tuple(tuple_node @ ast::ExprTuple { elts, range, .. })) => { - let display_kind = DisplayKind::Sequence(SequenceKind::Tuple(tuple_node)); - (Cow::Borrowed(elts), *range, display_kind) - } - (SpecialClassDunder::Slots, ast::Expr::Set(ast::ExprSet { elts, range })) => ( - Cow::Borrowed(elts), - *range, - DisplayKind::Sequence(SequenceKind::Set), - ), - ( - SpecialClassDunder::Slots, - ast::Expr::Dict(ast::ExprDict { - keys, - values, - range, - }), - ) => { - let mut narrowed_keys = Vec::with_capacity(values.len()); - for key in keys { - if let Some(key) = key { - // This is somewhat unfortunate, - // *but* only `__slots__` can be a dict out of - // `__all__`, `__slots__` and `__match_args__`, - // and even for `__slots__`, using a dict is very rare - narrowed_keys.push(key.to_owned()); - } else { - return None; +#[derive(Debug)] +struct NodeAnalysis<'a> { + elts: Cow<'a, Vec>, + range: TextRange, + display_kind: DisplayKind<'a>, +} + +impl<'a> NodeAnalysis<'a> { + fn new(node: &'a ast::Expr, dunder_kind: SpecialClassDunder) -> Option { + let result = match (dunder_kind, node) { + (_, ast::Expr::List(ast::ExprList { elts, range, .. })) => { + let display_kind = DisplayKind::Sequence(SequenceKind::List); + Self { + elts: Cow::Borrowed(elts), + range: *range, + display_kind, + } + } + (_, ast::Expr::Tuple(tuple_node @ ast::ExprTuple { elts, range, .. })) => { + let display_kind = DisplayKind::Sequence(SequenceKind::Tuple(tuple_node)); + Self { + elts: Cow::Borrowed(elts), + range: *range, + display_kind, + } + } + (SpecialClassDunder::Slots, ast::Expr::Set(ast::ExprSet { elts, range })) => { + let display_kind = DisplayKind::Sequence(SequenceKind::Set); + Self { + elts: Cow::Borrowed(elts), + range: *range, + display_kind, } } - // If `None` was present in the keys, it indicates a "** splat", .e.g - // `__slots__ = {"foo": "bar", **other_dict}` - // If `None` wasn't present in the keys, - // the length of the keys should always equal the length of the values - assert_eq!(narrowed_keys.len(), values.len()); ( - Cow::Owned(narrowed_keys), - *range, - DisplayKind::Dict { values }, - ) - } - _ => return None, - }; - Some(result) + SpecialClassDunder::Slots, + ast::Expr::Dict(ast::ExprDict { + keys, + values, + range, + }), + ) => { + let mut narrowed_keys = Vec::with_capacity(values.len()); + for key in keys { + if let Some(key) = key { + // This is somewhat unfortunate, + // *but* only `__slots__` can be a dict out of + // `__all__`, `__slots__` and `__match_args__`, + // and even for `__slots__`, using a dict is very rare + narrowed_keys.push(key.to_owned()); + } else { + return None; + } + } + // If `None` was present in the keys, it indicates a "** splat", .e.g + // `__slots__ = {"foo": "bar", **other_dict}` + // If `None` wasn't present in the keys, + // the length of the keys should always equal the length of the values + assert_eq!(narrowed_keys.len(), values.len()); + let display_kind = DisplayKind::Dict { values }; + Self { + elts: Cow::Owned(narrowed_keys), + range: *range, + display_kind, + } + } + _ => return None, + }; + Some(result) + } } fn create_fix( - display_kind: DisplayKind<'_>, - elts: &[ast::Expr], + NodeAnalysis { + elts, + range, + display_kind, + }: &NodeAnalysis, items: &[&str], - range: TextRange, - locator: &Locator, -) -> Fix { - let new_var = match display_kind { - DisplayKind::Dict { values } => { - sort_single_line_elements_dict(elts, items, values, locator, natord::compare) + checker: &Checker, +) -> Option { + let locator = checker.locator(); + let is_multiline = locator.contains_line_break(*range); + let sorted_source_code = { + if is_multiline { + // Sorting multiline dicts is unsupported + let display_kind = display_kind.as_sequence()?; + let analyzed_sequence = + MultilineStringSequenceValue::from_source_range(*range, display_kind, locator)?; + assert_eq!(analyzed_sequence.len(), elts.len()); + analyzed_sequence.into_sorted_source_code( + |this, next| natord::compare(this.value(), next.value()), + locator, + checker.stylist(), + ) + } else { + match display_kind { + DisplayKind::Dict { values } => { + sort_single_line_elements_dict(elts, items, values, locator, natord::compare) + } + DisplayKind::Sequence(sequence_kind) => sort_single_line_elements_sequence( + sequence_kind, + elts, + items, + locator, + natord::compare, + ), + } } - DisplayKind::Sequence(sequence_kind) => sort_single_line_elements_sequence( - &sequence_kind, - elts, - items, - locator, - natord::compare, - ), }; - Fix::safe_edit(Edit::range_replacement(new_var, range)) + Some(Fix::safe_edit(Edit::range_replacement( + sorted_source_code, + *range, + ))) } diff --git a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__RUF023_RUF023.py.snap b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__RUF023_RUF023.py.snap index 4abba38e93994..3d6fa43fd1691 100644 --- a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__RUF023_RUF023.py.snap +++ b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__RUF023_RUF023.py.snap @@ -47,8 +47,8 @@ RUF023.py:11:22: RUF023 [*] `Klass.__slots__` is not sorted 10 | # but unnecessary parens are not 11 | __slots__: set = {'b', "c", ((('a')))} | ^^^^^^^^^^^^^^^^^^^^^ RUF023 -12 | # Trailing commas are also not retained in the fix -13 | __match_args__: tuple = ("b", "c", "a",) +12 | # Trailing commas are also not retained for single-line definitions +13 | # (but they are in multiline definitions) | = help: Apply a natural sort to `Klass.__slots__` @@ -58,215 +58,489 @@ RUF023.py:11:22: RUF023 [*] `Klass.__slots__` is not sorted 10 10 | # but unnecessary parens are not 11 |- __slots__: set = {'b', "c", ((('a')))} 11 |+ __slots__: set = {'a', 'b', "c"} -12 12 | # Trailing commas are also not retained in the fix -13 13 | __match_args__: tuple = ("b", "c", "a",) -14 14 | +12 12 | # Trailing commas are also not retained for single-line definitions +13 13 | # (but they are in multiline definitions) +14 14 | __match_args__: tuple = ("b", "c", "a",) -RUF023.py:13:29: RUF023 [*] `Klass.__match_args__` is not sorted +RUF023.py:14:29: RUF023 [*] `Klass.__match_args__` is not sorted | -11 | __slots__: set = {'b', "c", ((('a')))} -12 | # Trailing commas are also not retained in the fix -13 | __match_args__: tuple = ("b", "c", "a",) +12 | # Trailing commas are also not retained for single-line definitions +13 | # (but they are in multiline definitions) +14 | __match_args__: tuple = ("b", "c", "a",) | ^^^^^^^^^^^^^^^^ RUF023 -14 | -15 | class Klass2: +15 | +16 | class Klass2: | = help: Apply a natural sort to `Klass.__match_args__` ℹ Safe fix -10 10 | # but unnecessary parens are not 11 11 | __slots__: set = {'b', "c", ((('a')))} -12 12 | # Trailing commas are also not retained in the fix -13 |- __match_args__: tuple = ("b", "c", "a",) - 13 |+ __match_args__: tuple = ("a", "b", "c") -14 14 | -15 15 | class Klass2: -16 16 | if bool(): - -RUF023.py:17:21: RUF023 [*] `Klass2.__slots__` is not sorted +12 12 | # Trailing commas are also not retained for single-line definitions +13 13 | # (but they are in multiline definitions) +14 |- __match_args__: tuple = ("b", "c", "a",) + 14 |+ __match_args__: tuple = ("a", "b", "c") +15 15 | +16 16 | class Klass2: +17 17 | if bool(): + +RUF023.py:18:21: RUF023 [*] `Klass2.__slots__` is not sorted | -15 | class Klass2: -16 | if bool(): -17 | __slots__ = {"x": "docs for x", "m": "docs for m", "a": "docs for a"} +16 | class Klass2: +17 | if bool(): +18 | __slots__ = {"x": "docs for x", "m": "docs for m", "a": "docs for a"} | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF023 -18 | else: -19 | __slots__ = "foo3", "foo2", "foo1" # NB: an implicit tuple (without parens) +19 | else: +20 | __slots__ = "foo3", "foo2", "foo1" # NB: an implicit tuple (without parens) | = help: Apply a natural sort to `Klass2.__slots__` ℹ Safe fix -14 14 | -15 15 | class Klass2: -16 16 | if bool(): -17 |- __slots__ = {"x": "docs for x", "m": "docs for m", "a": "docs for a"} - 17 |+ __slots__ = {"a": "docs for a", "m": "docs for m", "x": "docs for x"} -18 18 | else: -19 19 | __slots__ = "foo3", "foo2", "foo1" # NB: an implicit tuple (without parens) -20 20 | - -RUF023.py:19:21: RUF023 [*] `Klass2.__slots__` is not sorted +15 15 | +16 16 | class Klass2: +17 17 | if bool(): +18 |- __slots__ = {"x": "docs for x", "m": "docs for m", "a": "docs for a"} + 18 |+ __slots__ = {"a": "docs for a", "m": "docs for m", "x": "docs for x"} +19 19 | else: +20 20 | __slots__ = "foo3", "foo2", "foo1" # NB: an implicit tuple (without parens) +21 21 | + +RUF023.py:20:21: RUF023 [*] `Klass2.__slots__` is not sorted | -17 | __slots__ = {"x": "docs for x", "m": "docs for m", "a": "docs for a"} -18 | else: -19 | __slots__ = "foo3", "foo2", "foo1" # NB: an implicit tuple (without parens) +18 | __slots__ = {"x": "docs for x", "m": "docs for m", "a": "docs for a"} +19 | else: +20 | __slots__ = "foo3", "foo2", "foo1" # NB: an implicit tuple (without parens) | ^^^^^^^^^^^^^^^^^^^^^^ RUF023 -20 | -21 | __match_args__: list[str] = ["the", "three", "little", "pigs"] +21 | +22 | __match_args__: list[str] = ["the", "three", "little", "pigs"] | = help: Apply a natural sort to `Klass2.__slots__` ℹ Safe fix -16 16 | if bool(): -17 17 | __slots__ = {"x": "docs for x", "m": "docs for m", "a": "docs for a"} -18 18 | else: -19 |- __slots__ = "foo3", "foo2", "foo1" # NB: an implicit tuple (without parens) - 19 |+ __slots__ = "foo1", "foo2", "foo3" # NB: an implicit tuple (without parens) -20 20 | -21 21 | __match_args__: list[str] = ["the", "three", "little", "pigs"] -22 22 | - -RUF023.py:21:33: RUF023 [*] `Klass2.__match_args__` is not sorted +17 17 | if bool(): +18 18 | __slots__ = {"x": "docs for x", "m": "docs for m", "a": "docs for a"} +19 19 | else: +20 |- __slots__ = "foo3", "foo2", "foo1" # NB: an implicit tuple (without parens) + 20 |+ __slots__ = "foo1", "foo2", "foo3" # NB: an implicit tuple (without parens) +21 21 | +22 22 | __match_args__: list[str] = ["the", "three", "little", "pigs"] +23 23 | __slots__ = ("parenthesized_item"), "in", ("an_unparenthesized_tuple") + +RUF023.py:22:33: RUF023 [*] `Klass2.__match_args__` is not sorted | -19 | __slots__ = "foo3", "foo2", "foo1" # NB: an implicit tuple (without parens) -20 | -21 | __match_args__: list[str] = ["the", "three", "little", "pigs"] +20 | __slots__ = "foo3", "foo2", "foo1" # NB: an implicit tuple (without parens) +21 | +22 | __match_args__: list[str] = ["the", "three", "little", "pigs"] | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF023 -22 | 23 | __slots__ = ("parenthesized_item"), "in", ("an_unparenthesized_tuple") +24 | # we use natural sort, | = help: Apply a natural sort to `Klass2.__match_args__` ℹ Safe fix -18 18 | else: -19 19 | __slots__ = "foo3", "foo2", "foo1" # NB: an implicit tuple (without parens) -20 20 | -21 |- __match_args__: list[str] = ["the", "three", "little", "pigs"] - 21 |+ __match_args__: list[str] = ["little", "pigs", "the", "three"] -22 22 | +19 19 | else: +20 20 | __slots__ = "foo3", "foo2", "foo1" # NB: an implicit tuple (without parens) +21 21 | +22 |- __match_args__: list[str] = ["the", "three", "little", "pigs"] + 22 |+ __match_args__: list[str] = ["little", "pigs", "the", "three"] 23 23 | __slots__ = ("parenthesized_item"), "in", ("an_unparenthesized_tuple") -24 24 | +24 24 | # we use natural sort, +25 25 | # not alphabetical sort or "isort-style" sort RUF023.py:23:17: RUF023 [*] `Klass2.__slots__` is not sorted | -21 | __match_args__: list[str] = ["the", "three", "little", "pigs"] -22 | +22 | __match_args__: list[str] = ["the", "three", "little", "pigs"] 23 | __slots__ = ("parenthesized_item"), "in", ("an_unparenthesized_tuple") | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF023 -24 | -25 | ################################################## +24 | # we use natural sort, +25 | # not alphabetical sort or "isort-style" sort | = help: Apply a natural sort to `Klass2.__slots__` ℹ Safe fix -20 20 | -21 21 | __match_args__: list[str] = ["the", "three", "little", "pigs"] -22 22 | +20 20 | __slots__ = "foo3", "foo2", "foo1" # NB: an implicit tuple (without parens) +21 21 | +22 22 | __match_args__: list[str] = ["the", "three", "little", "pigs"] 23 |- __slots__ = ("parenthesized_item"), "in", ("an_unparenthesized_tuple") 23 |+ __slots__ = "an_unparenthesized_tuple", "in", "parenthesized_item" -24 24 | -25 25 | ################################################## -26 26 | # Multiline definitions are flagged, but not fixed +24 24 | # we use natural sort, +25 25 | # not alphabetical sort or "isort-style" sort +26 26 | __slots__ = {"aadvark237", "aadvark10092", "aadvark174", "aadvark532"} + +RUF023.py:26:17: RUF023 [*] `Klass2.__slots__` is not sorted + | +24 | # we use natural sort, +25 | # not alphabetical sort or "isort-style" sort +26 | __slots__ = {"aadvark237", "aadvark10092", "aadvark174", "aadvark532"} + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF023 +27 | +28 | ############################ + | + = help: Apply a natural sort to `Klass2.__slots__` -RUF023.py:30:17: RUF023 `Klass3.__slots__` is not sorted +ℹ Safe fix +23 23 | __slots__ = ("parenthesized_item"), "in", ("an_unparenthesized_tuple") +24 24 | # we use natural sort, +25 25 | # not alphabetical sort or "isort-style" sort +26 |- __slots__ = {"aadvark237", "aadvark10092", "aadvark174", "aadvark532"} + 26 |+ __slots__ = {"aadvark174", "aadvark237", "aadvark532", "aadvark10092"} +27 27 | +28 28 | ############################ +29 29 | # Neat multiline definitions + +RUF023.py:33:17: RUF023 [*] `Klass3.__slots__` is not sorted | -29 | class Klass3: -30 | __slots__ = ( +32 | class Klass3: +33 | __slots__ = ( | _________________^ -31 | | "d0", -32 | | "c0", # a comment regarding 'c0' -33 | | "b0", -34 | | # a comment regarding 'a0': -35 | | "a0" -36 | | ) +34 | | "d0", +35 | | "c0", # a comment regarding 'c0' +36 | | "b0", +37 | | # a comment regarding 'a0': +38 | | "a0" +39 | | ) | |_____^ RUF023 -37 | -38 | __match_args__ = [ +40 | __match_args__ = [ +41 | "d", | = help: Apply a natural sort to `Klass3.__slots__` -RUF023.py:38:22: RUF023 `Klass3.__match_args__` is not sorted +ℹ Safe fix +31 31 | +32 32 | class Klass3: +33 33 | __slots__ = ( +34 |- "d0", + 34 |+ # a comment regarding 'a0': + 35 |+ "a0", + 36 |+ "b0", +35 37 | "c0", # a comment regarding 'c0' +36 |- "b0", +37 |- # a comment regarding 'a0': +38 |- "a0" + 38 |+ "d0" +39 39 | ) +40 40 | __match_args__ = [ +41 41 | "d", + +RUF023.py:40:22: RUF023 [*] `Klass3.__match_args__` is not sorted | -36 | ) -37 | -38 | __match_args__ = [ +38 | "a0" +39 | ) +40 | __match_args__ = [ | ______________________^ -39 | | "d", -40 | | "c", # a comment regarding 'c' -41 | | "b", -42 | | # a comment regarding 'a': -43 | | "a" -44 | | ] +41 | | "d", +42 | | "c", # a comment regarding 'c' +43 | | "b", +44 | | # a comment regarding 'a': +45 | | "a" +46 | | ] | |_____^ RUF023 -45 | -46 | class Klass4: +47 | +48 | ########################################## | = help: Apply a natural sort to `Klass3.__match_args__` -RUF023.py:49:17: RUF023 [*] `Klass4.__slots__` is not sorted +ℹ Safe fix +38 38 | "a0" +39 39 | ) +40 40 | __match_args__ = [ +41 |- "d", + 41 |+ # a comment regarding 'a': + 42 |+ "a", + 43 |+ "b", +42 44 | "c", # a comment regarding 'c' +43 |- "b", +44 |- # a comment regarding 'a': +45 |- "a" + 45 |+ "d" +46 46 | ] +47 47 | +48 48 | ########################################## + +RUF023.py:54:17: RUF023 [*] `Klass4.__slots__` is not sorted | -47 | # we use natural sort, -48 | # not alphabetical sort. -49 | __slots__ = {"aadvark237", "aadvark10092", "aadvark174", "aadvark532"} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF023 -50 | -51 | __match_args__ = ( +52 | class Klass4: +53 | # comment0 +54 | __slots__ = ("d", "a", # comment1 + | _________________^ +55 | | # comment2 +56 | | "f", "b", +57 | | "strangely", # comment3 +58 | | # comment4 +59 | | "formatted", +60 | | # comment5 +61 | | ) # comment6 + | |_____^ RUF023 +62 | # comment7 | = help: Apply a natural sort to `Klass4.__slots__` ℹ Safe fix -46 46 | class Klass4: -47 47 | # we use natural sort, -48 48 | # not alphabetical sort. -49 |- __slots__ = {"aadvark237", "aadvark10092", "aadvark174", "aadvark532"} - 49 |+ __slots__ = {"aadvark174", "aadvark237", "aadvark532", "aadvark10092"} -50 50 | -51 51 | __match_args__ = ( -52 52 | "look", - -RUF023.py:51:22: RUF023 `Klass4.__match_args__` is not sorted +51 51 | +52 52 | class Klass4: +53 53 | # comment0 +54 |- __slots__ = ("d", "a", # comment1 +55 |- # comment2 +56 |- "f", "b", +57 |- "strangely", # comment3 +58 |- # comment4 + 54 |+ __slots__ = ( + 55 |+ "a", + 56 |+ "b", + 57 |+ "d", # comment1 + 58 |+ # comment2 + 59 |+ "f", + 60 |+ # comment4 +59 61 | "formatted", + 62 |+ "strangely", # comment3 +60 63 | # comment5 +61 64 | ) # comment6 +62 65 | # comment7 + +RUF023.py:64:22: RUF023 [*] `Klass4.__match_args__` is not sorted | -49 | __slots__ = {"aadvark237", "aadvark10092", "aadvark174", "aadvark532"} -50 | -51 | __match_args__ = ( +62 | # comment7 +63 | +64 | __match_args__ = [ # comment0 | ______________________^ -52 | | "look", -53 | | ( -54 | | "a_veeeeeeeeeeeeeeeeeeery_long_parenthesized_item" -55 | | ), -56 | | ) +65 | | # comment1 +66 | | # comment2 +67 | | "dx", "cx", "bx", "ax" # comment3 +68 | | # comment4 +69 | | # comment5 +70 | | # comment6 +71 | | ] # comment7 | |_____^ RUF023 -57 | -58 | __slots__ = ( +72 | +73 | # from cpython/Lib/pathlib/__init__.py | = help: Apply a natural sort to `Klass4.__match_args__` -RUF023.py:58:17: RUF023 `Klass4.__slots__` is not sorted - | -56 | ) -57 | -58 | __slots__ = ( - | _________________^ -59 | | "b", -60 | | (( -61 | | "c" -62 | | )), -63 | | "a" -64 | | ) - | |_____^ RUF023 -65 | -66 | __slots__ = ("don't" "care" "about", "__all__" "with", "concatenated" "strings") - | - = help: Apply a natural sort to `Klass4.__slots__` +ℹ Safe fix +62 62 | # comment7 +63 63 | +64 64 | __match_args__ = [ # comment0 + 65 |+ "ax", + 66 |+ "bx", + 67 |+ "cx", +65 68 | # comment1 +66 69 | # comment2 +67 |- "dx", "cx", "bx", "ax" # comment3 + 70 |+ "dx" # comment3 +68 71 | # comment4 +69 72 | # comment5 +70 73 | # comment6 -RUF023.py:66:17: RUF023 `Klass4.__slots__` is not sorted - | -64 | ) -65 | -66 | __slots__ = ("don't" "care" "about", "__all__" "with", "concatenated" "strings") - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF023 -67 | -68 | ################################### - | - = help: Apply a natural sort to `Klass4.__slots__` +RUF023.py:75:17: RUF023 [*] `PurePath.__slots__` is not sorted + | + 73 | # from cpython/Lib/pathlib/__init__.py + 74 | class PurePath: + 75 | __slots__ = ( + | _________________^ + 76 | | # The `_raw_paths` slot stores unnormalized string paths. This is set + 77 | | # in the `__init__()` method. + 78 | | '_raw_paths', + 79 | | + 80 | | # The `_drv`, `_root` and `_tail_cached` slots store parsed and + 81 | | # normalized parts of the path. They are set when any of the `drive`, + 82 | | # `root` or `_tail` properties are accessed for the first time. The + 83 | | # three-part division corresponds to the result of + 84 | | # `os.path.splitroot()`, except that the tail is further split on path + 85 | | # separators (i.e. it is a list of strings), and that the root and + 86 | | # tail are normalized. + 87 | | '_drv', '_root', '_tail_cached', + 88 | | + 89 | | # The `_str` slot stores the string representation of the path, + 90 | | # computed from the drive, root and tail when `__str__()` is called + 91 | | # for the first time. It's used to implement `_str_normcase` + 92 | | '_str', + 93 | | + 94 | | # The `_str_normcase_cached` slot stores the string path with + 95 | | # normalized case. It is set when the `_str_normcase` property is + 96 | | # accessed for the first time. It's used to implement `__eq__()` + 97 | | # `__hash__()`, and `_parts_normcase` + 98 | | '_str_normcase_cached', + 99 | | +100 | | # The `_parts_normcase_cached` slot stores the case-normalized +101 | | # string path after splitting on path separators. It's set when the +102 | | # `_parts_normcase` property is accessed for the first time. It's used +103 | | # to implement comparison methods like `__lt__()`. +104 | | '_parts_normcase_cached', +105 | | +106 | | # The `_hash` slot stores the hash of the case-normalized string +107 | | # path. It's set when `__hash__()` is called for the first time. +108 | | '_hash', +109 | | ) + | |_____^ RUF023 +110 | +111 | # From cpython/Lib/pickletools.py + | + = help: Apply a natural sort to `PurePath.__slots__` + +ℹ Safe fix +73 73 | # from cpython/Lib/pathlib/__init__.py +74 74 | class PurePath: +75 75 | __slots__ = ( +76 |- # The `_raw_paths` slot stores unnormalized string paths. This is set +77 |- # in the `__init__()` method. +78 |- '_raw_paths', +79 |- +80 76 | # The `_drv`, `_root` and `_tail_cached` slots store parsed and +81 77 | # normalized parts of the path. They are set when any of the `drive`, +82 78 | # `root` or `_tail` properties are accessed for the first time. The +-------------------------------------------------------------------------------- +84 80 | # `os.path.splitroot()`, except that the tail is further split on path +85 81 | # separators (i.e. it is a list of strings), and that the root and +86 82 | # tail are normalized. +87 |- '_drv', '_root', '_tail_cached', +88 |- + 83 |+ '_drv', + 84 |+ # The `_hash` slot stores the hash of the case-normalized string + 85 |+ # path. It's set when `__hash__()` is called for the first time. + 86 |+ '_hash', + 87 |+ # The `_parts_normcase_cached` slot stores the case-normalized + 88 |+ # string path after splitting on path separators. It's set when the + 89 |+ # `_parts_normcase` property is accessed for the first time. It's used + 90 |+ # to implement comparison methods like `__lt__()`. + 91 |+ '_parts_normcase_cached', + 92 |+ # The `_raw_paths` slot stores unnormalized string paths. This is set + 93 |+ # in the `__init__()` method. + 94 |+ '_raw_paths', + 95 |+ '_root', +89 96 | # The `_str` slot stores the string representation of the path, +90 97 | # computed from the drive, root and tail when `__str__()` is called +91 98 | # for the first time. It's used to implement `_str_normcase` +92 99 | '_str', +93 |- +94 100 | # The `_str_normcase_cached` slot stores the string path with +95 101 | # normalized case. It is set when the `_str_normcase` property is +96 102 | # accessed for the first time. It's used to implement `__eq__()` +97 103 | # `__hash__()`, and `_parts_normcase` +98 104 | '_str_normcase_cached', +99 |- +100 |- # The `_parts_normcase_cached` slot stores the case-normalized +101 |- # string path after splitting on path separators. It's set when the +102 |- # `_parts_normcase` property is accessed for the first time. It's used +103 |- # to implement comparison methods like `__lt__()`. +104 |- '_parts_normcase_cached', +105 |- +106 |- # The `_hash` slot stores the hash of the case-normalized string +107 |- # path. It's set when `__hash__()` is called for the first time. +108 |- '_hash', + 105 |+ '_tail_cached', +109 106 | ) +110 107 | +111 108 | # From cpython/Lib/pickletools.py + +RUF023.py:113:17: RUF023 [*] `ArgumentDescriptor.__slots__` is not sorted + | +111 | # From cpython/Lib/pickletools.py +112 | class ArgumentDescriptor(object): +113 | __slots__ = ( + | _________________^ +114 | | # name of descriptor record, also a module global name; a string +115 | | 'name', +116 | | +117 | | # length of argument, in bytes; an int; UP_TO_NEWLINE and +118 | | # TAKEN_FROM_ARGUMENT{1,4,8} are negative values for variable-length +119 | | # cases +120 | | 'n', +121 | | +122 | | # a function taking a file-like object, reading this kind of argument +123 | | # from the object at the current position, advancing the current +124 | | # position by n bytes, and returning the value of the argument +125 | | 'reader', +126 | | +127 | | # human-readable docs for this arg descriptor; a string +128 | | 'doc', +129 | | ) + | |_____^ RUF023 +130 | +131 | #################################### + | + = help: Apply a natural sort to `ArgumentDescriptor.__slots__` + +ℹ Safe fix +111 111 | # From cpython/Lib/pickletools.py +112 112 | class ArgumentDescriptor(object): +113 113 | __slots__ = ( +114 |- # name of descriptor record, also a module global name; a string +115 |- 'name', +116 |- + 114 |+ # human-readable docs for this arg descriptor; a string + 115 |+ 'doc', +117 116 | # length of argument, in bytes; an int; UP_TO_NEWLINE and +118 117 | # TAKEN_FROM_ARGUMENT{1,4,8} are negative values for variable-length +119 118 | # cases +120 119 | 'n', +121 |- + 120 |+ # name of descriptor record, also a module global name; a string + 121 |+ 'name', +122 122 | # a function taking a file-like object, reading this kind of argument +123 123 | # from the object at the current position, advancing the current +124 124 | # position by n bytes, and returning the value of the argument +125 125 | 'reader', +126 |- +127 |- # human-readable docs for this arg descriptor; a string +128 |- 'doc', +129 126 | ) +130 127 | +131 128 | #################################### + +RUF023.py:138:17: RUF023 `SlotUser.__slots__` is not sorted + | +136 | # Multiline dicts are out of scope. +137 | class SlotUser: +138 | __slots__ = {'power': 'measured in kilowatts', + | _________________^ +139 | | 'distance': 'measured in kilometers'} + | |______________________________________________________^ RUF023 +140 | +141 | class Klass5: + | + = help: Apply a natural sort to `SlotUser.__slots__` + +RUF023.py:142:22: RUF023 `Klass5.__match_args__` is not sorted + | +141 | class Klass5: +142 | __match_args__ = ( + | ______________________^ +143 | | "look", +144 | | ( +145 | | "a_veeeeeeeeeeeeeeeeeeery_long_parenthesized_item" +146 | | ), +147 | | ) + | |_____^ RUF023 +148 | __slots__ = ( +149 | "b", + | + = help: Apply a natural sort to `Klass5.__match_args__` + +RUF023.py:148:17: RUF023 `Klass5.__slots__` is not sorted + | +146 | ), +147 | ) +148 | __slots__ = ( + | _________________^ +149 | | "b", +150 | | (( +151 | | "c" +152 | | )), +153 | | "a" +154 | | ) + | |_____^ RUF023 +155 | __slots__ = ("don't" "care" "about", "__slots__" "with", "concatenated" "strings") + | + = help: Apply a natural sort to `Klass5.__slots__` + +RUF023.py:155:17: RUF023 `Klass5.__slots__` is not sorted + | +153 | "a" +154 | ) +155 | __slots__ = ("don't" "care" "about", "__slots__" "with", "concatenated" "strings") + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF023 +156 | +157 | ################################### + | + = help: Apply a natural sort to `Klass5.__slots__` From 270432fe18ae88e2334f3d7d3ef9a88d6dda63a8 Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Thu, 18 Jan 2024 18:22:21 +0000 Subject: [PATCH 07/21] remove some unnecessary uses of `pub` --- crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs index fc7b1999203c8..cb188b432100b 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs @@ -51,7 +51,7 @@ impl SequenceKind<'_> { } } - pub(super) fn opening_token_for_multiline_definition(&self) -> Tok { + fn opening_token_for_multiline_definition(&self) -> Tok { match self { Self::List => Tok::Lsqb, Self::Set => Tok::Lbrace, @@ -59,7 +59,7 @@ impl SequenceKind<'_> { } } - pub(super) fn closing_token_for_multiline_definition(&self) -> Tok { + fn closing_token_for_multiline_definition(&self) -> Tok { match self { Self::List => Tok::Rsqb, Self::Set => Tok::Rbrace, From 6c944465017e5a736971f496e17a8034d3d9484d Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Fri, 19 Jan 2024 17:21:42 +0000 Subject: [PATCH 08/21] use static lifetimes Co-authored-by: Micha Reiser --- crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs index cb188b432100b..6b3274c2c605b 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs @@ -37,7 +37,7 @@ pub(super) enum SequenceKind<'a> { } impl SequenceKind<'_> { - fn surrounding_parens(&self, source: &str) -> (&str, &str) { + fn surrounding_parens(&self, source: &str) -> (&'static str, &'static str) { match self { Self::List => ("[", "]"), Self::Set => ("{", "}"), From 90c413b7e08a3ab34442116d227f54ce95cc3eea Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Fri, 19 Jan 2024 18:24:53 +0000 Subject: [PATCH 09/21] Address a bunch of Micha comments --- .../src/rules/ruff/rules/sequence_sorting.rs | 78 +++++-------------- .../src/rules/ruff/rules/sort_dunder_slots.rs | 41 +++++++++- 2 files changed, 58 insertions(+), 61 deletions(-) diff --git a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs index 6b3274c2c605b..4d6c62b61b7b5 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs @@ -8,13 +8,13 @@ use std::cmp::Ordering; use ruff_python_ast as ast; use ruff_python_codegen::Stylist; -use ruff_python_parser::{lexer, Mode, Tok}; +use ruff_python_parser::{lexer, Mode, Tok, TokenKind}; use ruff_python_trivia::leading_indentation; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange, TextSize}; use is_macro; -use itertools::{izip, Itertools}; +use itertools::Itertools; /// An enumeration of the various kinds of sequences for which Python has /// [display literals](https://docs.python.org/3/reference/expressions.html#displays-for-lists-sets-and-dictionaries). @@ -51,19 +51,19 @@ impl SequenceKind<'_> { } } - fn opening_token_for_multiline_definition(&self) -> Tok { + const fn opening_token_for_multiline_definition(&self) -> TokenKind { match self { - Self::List => Tok::Lsqb, - Self::Set => Tok::Lbrace, - Self::Tuple(_) => Tok::Lpar, + Self::List => TokenKind::Lsqb, + Self::Set => TokenKind::Lbrace, + Self::Tuple(_) => TokenKind::Lpar, } } - fn closing_token_for_multiline_definition(&self) -> Tok { + const fn closing_token_for_multiline_definition(&self) -> TokenKind { match self { - Self::List => Tok::Rsqb, - Self::Set => Tok::Rbrace, - Self::Tuple(_) => Tok::Rpar, + Self::List => TokenKind::Rsqb, + Self::Set => TokenKind::Rbrace, + Self::Tuple(_) => TokenKind::Rpar, } } } @@ -113,58 +113,20 @@ where result } -/// Create a string representing a fixed-up single-line -/// definition of `__all__` or `__slots__` (etc.), -/// that can be inserted into the -/// source code as a `range_replacement` autofix. -pub(super) fn sort_single_line_elements_dict( - key_elts: &[ast::Expr], - elements: &[&str], - value_elts: &[ast::Expr], - locator: &Locator, - mut cmp_fn: F, -) -> String -where - F: FnMut(&str, &str) -> Ordering, -{ - assert!(key_elts.len() == elements.len() && elements.len() == value_elts.len()); - let last_item_index = elements.len().saturating_sub(1); - let mut result = String::from('{'); - - let mut element_trios = izip!(elements, key_elts, value_elts).collect_vec(); - element_trios.sort_by(|(elem1, _, _), (elem2, _, _)| cmp_fn(elem1, elem2)); - // We grab the original source-code ranges using `locator.slice()` - // rather than using the expression generator, as this approach allows - // us to easily preserve stylistic choices in the original source code - // such as whether double or single quotes were used. - for (i, (_, key, value)) in element_trios.iter().enumerate() { - result.push_str(locator.slice(key)); - result.push_str(": "); - result.push_str(locator.slice(value)); - if i < last_item_index { - result.push_str(", "); - } - } - - result.push('}'); - result -} - /// An enumeration of the possible conclusions we could come to -/// regarding the ordering of the elements in a display of string literals: -/// -/// 1. It's a display of string literals that is already sorted -/// 2. It's an unsorted display of string literals, -/// but we wouldn't be able to autofix it -/// 3. It's an unsorted display of string literals, -/// and it's possible we could generate a fix for it -/// 4. The display contains one or more items that are not string -/// literals. +/// regarding the ordering of the elements in a display of string literals #[derive(Debug, is_macro::Is)] pub(super) enum SortClassification<'a> { + /// It's a display of string literals that is already sorted Sorted, + /// It's an unsorted display of string literals, + /// but we wouldn't be able to autofix it UnsortedButUnfixable, + /// It's an unsorted display of string literals, + /// and it's possible we could generate a fix for it UnsortedAndMaybeFixable { items: Vec<&'a str> }, + /// The display contains one or more items that are not string + /// literals. NotAListOfStringLiterals, } @@ -383,7 +345,7 @@ fn collect_string_sequence_lines( let mut token_iter = lexer::lex_starts_at(locator.slice(range), Mode::Expression, range.start()); let (first_tok, _) = token_iter.next()?.ok()?; - if first_tok != kind.opening_token_for_multiline_definition() { + if TokenKind::from(&first_tok) != kind.opening_token_for_multiline_definition() { return None; } let expected_final_token = kind.closing_token_for_multiline_definition(); @@ -406,7 +368,7 @@ fn collect_string_sequence_lines( line_state.visit_comma_token(subrange); ends_with_trailing_comma = true; } - tok if tok == expected_final_token => { + tok if TokenKind::from(&tok) == expected_final_token => { lines.push(line_state.into_string_sequence_line()); break; } diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs index 82047410e3c62..21bf48a3edf45 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs @@ -5,14 +5,16 @@ use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast as ast; use ruff_python_semantic::ScopeKind; +use ruff_source_file::Locator; use ruff_text_size::TextRange; use crate::checkers::ast::Checker; use crate::rules::ruff::rules::sequence_sorting::{ - sort_single_line_elements_dict, sort_single_line_elements_sequence, DisplayKind, - MultilineStringSequenceValue, SequenceKind, SortClassification, + sort_single_line_elements_sequence, DisplayKind, MultilineStringSequenceValue, SequenceKind, + SortClassification, }; +use itertools::{izip, Itertools}; use natord; /// ## What it does @@ -253,7 +255,7 @@ fn create_fix( } else { match display_kind { DisplayKind::Dict { values } => { - sort_single_line_elements_dict(elts, items, values, locator, natord::compare) + sort_single_line_elements_dict(elts, items, values, locator) } DisplayKind::Sequence(sequence_kind) => sort_single_line_elements_sequence( sequence_kind, @@ -270,3 +272,36 @@ fn create_fix( *range, ))) } + +/// Create a string representing a fixed-up single-line +/// definition of `__all__` or `__slots__` (etc.), +/// that can be inserted into the +/// source code as a `range_replacement` autofix. +pub(super) fn sort_single_line_elements_dict( + key_elts: &[ast::Expr], + elements: &[&str], + value_elts: &[ast::Expr], + locator: &Locator, +) -> String { + assert!(key_elts.len() == elements.len() && elements.len() == value_elts.len()); + let last_item_index = elements.len().saturating_sub(1); + let mut result = String::from('{'); + + let mut element_trios = izip!(elements, key_elts, value_elts).collect_vec(); + element_trios.sort_by(|(elem1, _, _), (elem2, _, _)| natord::compare(elem1, elem2)); + // We grab the original source-code ranges using `locator.slice()` + // rather than using the expression generator, as this approach allows + // us to easily preserve stylistic choices in the original source code + // such as whether double or single quotes were used. + for (i, (_, key, value)) in element_trios.iter().enumerate() { + result.push_str(locator.slice(key)); + result.push_str(": "); + result.push_str(locator.slice(value)); + if i < last_item_index { + result.push_str(", "); + } + } + + result.push('}'); + result +} From 2b41a65e66fccbac35a92f98a25cf985ee0f2d31 Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Fri, 19 Jan 2024 19:02:18 +0000 Subject: [PATCH 10/21] Introduce a `SortingStyle` enum --- .../src/rules/ruff/rules/sequence_sorting.rs | 138 +++++++++++++++--- .../src/rules/ruff/rules/sort_dunder_all.rs | 103 +------------ .../src/rules/ruff/rules/sort_dunder_slots.rs | 28 ++-- 3 files changed, 136 insertions(+), 133 deletions(-) diff --git a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs index 4d6c62b61b7b5..1f8906dc512ae 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs @@ -9,12 +9,114 @@ use std::cmp::Ordering; use ruff_python_ast as ast; use ruff_python_codegen::Stylist; use ruff_python_parser::{lexer, Mode, Tok, TokenKind}; +use ruff_python_stdlib::str::is_cased_uppercase; use ruff_python_trivia::leading_indentation; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange, TextSize}; use is_macro; use itertools::Itertools; +use natord; + +/// An enumeration of the different sorting styles +/// currently supported for displays of string literals +#[derive(Debug)] +pub(super) enum SortingStyle { + Natural, + Isort, +} + +impl SortingStyle { + pub(super) fn compare(&self, a: &str, b: &str) -> Ordering { + match self { + Self::Natural => natord::compare(a, b), + Self::Isort => IsortSortKey::from(a).cmp(&IsortSortKey::from(b)), + } + } +} + +/// A struct to implement logic necessary to achieve +/// an "isort-style sort". +/// +/// An isort-style sort sorts items first according to their casing: +/// `SCREAMING_SNAKE_CASE` names (conventionally used for global constants) +/// come first, followed by CamelCase names (conventionally used for +/// classes), followed by anything else. Within each category, +/// a [natural sort](https://en.wikipedia.org/wiki/Natural_sort_order) +/// is used to order the elements. +struct IsortSortKey<'a> { + category: InferredMemberType, + value: &'a str, +} + +impl Ord for IsortSortKey<'_> { + fn cmp(&self, other: &Self) -> Ordering { + self.category + .cmp(&other.category) + .then_with(|| natord::compare(self.value, other.value)) + } +} + +impl PartialOrd for IsortSortKey<'_> { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl PartialEq for IsortSortKey<'_> { + fn eq(&self, other: &Self) -> bool { + self.cmp(other) == Ordering::Equal + } +} + +impl Eq for IsortSortKey<'_> {} + +impl<'a> From<&'a str> for IsortSortKey<'a> { + fn from(value: &'a str) -> Self { + Self { + category: InferredMemberType::of(value), + value, + } + } +} + +impl<'a> From<&'a StringSequenceItem> for IsortSortKey<'a> { + fn from(item: &'a StringSequenceItem) -> Self { + Self::from(item.value.as_str()) + } +} + +/// Classification for the casing of an element in a +/// sequence of literal strings. +/// +/// This is necessary to achieve an "isort-style" sort, +/// where elements are sorted first by category, +/// then, within categories, are sorted according +/// to a natural sort. +/// +/// You'll notice that a very similar enum exists +/// in ruff's reimplementation of isort. +#[derive(Debug, Ord, PartialOrd, Eq, PartialEq, Clone, Copy)] +enum InferredMemberType { + Constant, + Class, + Other, +} + +impl InferredMemberType { + fn of(value: &str) -> Self { + // E.g. `CONSTANT` + if value.len() > 1 && is_cased_uppercase(value) { + Self::Constant + // E.g. `Class` + } else if value.starts_with(char::is_uppercase) { + Self::Class + // E.g. `some_variable` or `some_function` + } else { + Self::Other + } + } +} /// An enumeration of the various kinds of sequences for which Python has /// [display literals](https://docs.python.org/3/reference/expressions.html#displays-for-lists-sets-and-dictionaries). @@ -81,23 +183,20 @@ pub(super) enum DisplayKind<'a> { /// definition of `__all__` or `__slots__` (etc.), /// that can be inserted into the /// source code as a `range_replacement` autofix. -pub(super) fn sort_single_line_elements_sequence( +pub(super) fn sort_single_line_elements_sequence( kind: &SequenceKind, elts: &[ast::Expr], elements: &[&str], locator: &Locator, - mut cmp_fn: F, -) -> String -where - F: FnMut(&str, &str) -> Ordering, -{ + sorting_style: &SortingStyle, +) -> String { assert_eq!(elts.len(), elements.len()); let (opening_paren, closing_paren) = kind.surrounding_parens(locator.contents()); let last_item_index = elements.len().saturating_sub(1); let mut result = String::from(opening_paren); let mut element_pairs = elements.iter().zip(elts).collect_vec(); - element_pairs.sort_by(|(elem1, _), (elem2, _)| cmp_fn(elem1, elem2)); + element_pairs.sort_by(|(elem1, _), (elem2, _)| sorting_style.compare(elem1, elem2)); // We grab the original source-code ranges using `locator.slice()` // rather than using the expression generator, as this approach allows // us to easily preserve stylistic choices in the original source code @@ -131,10 +230,7 @@ pub(super) enum SortClassification<'a> { } impl<'a> SortClassification<'a> { - pub(super) fn of_elements(elements: &'a [ast::Expr], mut cmp_fn: F) -> Self - where - F: FnMut(&str, &str) -> Ordering, - { + pub(super) fn of_elements(elements: &'a [ast::Expr], sorting_style: &SortingStyle) -> Self { let Some((first, rest @ [_, ..])) = elements.split_first() else { return Self::Sorted; }; @@ -148,7 +244,7 @@ impl<'a> SortClassification<'a> { return Self::NotAListOfStringLiterals; }; let next = string_node.value.to_str(); - if cmp_fn(next, this).is_lt() { + if sorting_style.compare(next, this).is_lt() { let mut items = Vec::with_capacity(elements.len()); for expr in elements { let Some(string_node) = expr.as_string_literal_expr() else { @@ -220,15 +316,12 @@ impl MultilineStringSequenceValue { /// has length < 2. It's redundant to call this method in this case, /// since lists with < 2 items cannot be unsorted, /// so this is a logic error. - pub(super) fn into_sorted_source_code( + pub(super) fn into_sorted_source_code( mut self, - cmp_fn: F, + sorting_style: &SortingStyle, locator: &Locator, stylist: &Stylist, - ) -> String - where - F: FnMut(&StringSequenceItem, &StringSequenceItem) -> Ordering, - { + ) -> String { let (first_item_start, last_item_end) = match self.items.as_slice() { [first_item, .., last_item] => (first_item.start(), last_item.end()), _ => panic!( @@ -289,7 +382,8 @@ impl MultilineStringSequenceValue { locator, ); - self.items.sort_by(cmp_fn); + self.items + .sort_by(|a, b| sorting_style.compare(&a.value, &b.value)); let joined_items = join_multiline_string_sequence_items( &self.items, locator, @@ -594,7 +688,7 @@ fn collect_string_sequence_items( /// of `# comment1` does not form a contiguous range with the /// source-code range of `"a"`. #[derive(Debug)] -pub(super) struct StringSequenceItem { +struct StringSequenceItem { value: String, preceding_comment_ranges: Vec, element_range: TextRange, @@ -607,10 +701,6 @@ pub(super) struct StringSequenceItem { } impl StringSequenceItem { - pub(super) fn value(&self) -> &str { - &self.value - } - fn new( value: String, preceding_comment_ranges: Vec, diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs index 3ccff475e7b74..3df17a5933d99 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs @@ -1,24 +1,19 @@ -use std::cmp::Ordering; - use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast as ast; -use ruff_python_stdlib::str::is_cased_uppercase; use ruff_text_size::TextRange; use crate::checkers::ast::Checker; use crate::rules::ruff::rules::sequence_sorting::{ sort_single_line_elements_sequence, MultilineStringSequenceValue, SequenceKind, - SortClassification, StringSequenceItem, + SortClassification, SortingStyle, }; -use natord; - /// ## What it does /// Checks for `__all__` definitions that are not ordered /// according to an "isort-style" sort. /// -/// An isort-style sort sorts items first according to their casing: +/// An isort-style sort orders items first according to their casing: /// SCREAMING_SNAKE_CASE names (conventionally used for global constants) /// come first, followed by CamelCase names (conventionally used for /// classes), followed by anything else. Within each category, @@ -82,6 +77,8 @@ impl Violation for UnsortedDunderAll { } } +const SORTING_STYLE: &SortingStyle = &SortingStyle::Isort; + /// Sort an `__all__` definition represented by a `StmtAssign` AST node. /// For example: `__all__ = ["b", "c", "a"]`. pub(crate) fn sort_dunder_all_assign( @@ -161,9 +158,7 @@ fn sort_dunder_all(checker: &mut Checker, target: &ast::Expr, node: &ast::Expr) _ => return, }; - let elts_analysis = SortClassification::of_elements(elts, |a, b| { - AllItemSortKey::from(a).cmp(&AllItemSortKey::from(b)) - }); + let elts_analysis = SortClassification::of_elements(elts, SORTING_STYLE); if elts_analysis.is_not_a_list_of_string_literals() || elts_analysis.is_sorted() { return; } @@ -179,84 +174,6 @@ fn sort_dunder_all(checker: &mut Checker, target: &ast::Expr, node: &ast::Expr) checker.diagnostics.push(diagnostic); } -/// A struct to implement logic necessary to achieve -/// an "isort-style sort". -/// -/// See the docs for this module as a whole for the -/// definition we use here of an "isort-style sort". -struct AllItemSortKey<'a> { - category: InferredMemberType, - value: &'a str, -} - -impl Ord for AllItemSortKey<'_> { - fn cmp(&self, other: &Self) -> Ordering { - self.category - .cmp(&other.category) - .then_with(|| natord::compare(self.value, other.value)) - } -} - -impl PartialOrd for AllItemSortKey<'_> { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl PartialEq for AllItemSortKey<'_> { - fn eq(&self, other: &Self) -> bool { - self.cmp(other) == Ordering::Equal - } -} - -impl Eq for AllItemSortKey<'_> {} - -impl<'a> From<&'a str> for AllItemSortKey<'a> { - fn from(value: &'a str) -> Self { - Self { - category: InferredMemberType::of(value), - value, - } - } -} - -impl<'a> From<&'a StringSequenceItem> for AllItemSortKey<'a> { - fn from(item: &'a StringSequenceItem) -> Self { - Self::from(item.value()) - } -} - -/// Classification for an element in `__all__`. -/// -/// This is necessary to achieve an "isort-style" sort, -/// where elements are sorted first by category, -/// then, within categories, are sorted according -/// to a natural sort. -/// -/// You'll notice that a very similar enum exists -/// in ruff's reimplementation of isort. -#[derive(Debug, Ord, PartialOrd, Eq, PartialEq, Clone, Copy)] -enum InferredMemberType { - Constant, - Class, - Other, -} - -impl InferredMemberType { - fn of(value: &str) -> Self { - // E.g. `CONSTANT` - if value.len() > 1 && is_cased_uppercase(value) { - Self::Constant - // E.g. `Class` - } else if value.starts_with(char::is_uppercase) { - Self::Class - // E.g. `some_variable` or `some_function` - } else { - Self::Other - } - } -} - /// Attempt to return `Some(fix)`, where `fix` is a `Fix` /// that can be set on the diagnostic to sort the user's /// `__all__` definition @@ -293,15 +210,9 @@ fn create_fix( if is_multiline { let value = MultilineStringSequenceValue::from_source_range(range, kind, locator)?; assert_eq!(value.len(), elts.len()); - value.into_sorted_source_code( - |this, next| AllItemSortKey::from(this).cmp(&AllItemSortKey::from(next)), - locator, - checker.stylist(), - ) + value.into_sorted_source_code(SORTING_STYLE, locator, checker.stylist()) } else { - sort_single_line_elements_sequence(kind, elts, string_items, locator, |a, b| { - AllItemSortKey::from(a).cmp(&AllItemSortKey::from(b)) - }) + sort_single_line_elements_sequence(kind, elts, string_items, locator, SORTING_STYLE) } }; diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs index 21bf48a3edf45..c7e98d2d153cf 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs @@ -11,11 +11,10 @@ use ruff_text_size::TextRange; use crate::checkers::ast::Checker; use crate::rules::ruff::rules::sequence_sorting::{ sort_single_line_elements_sequence, DisplayKind, MultilineStringSequenceValue, SequenceKind, - SortClassification, + SortClassification, SortingStyle, }; use itertools::{izip, Itertools}; -use natord; /// ## What it does /// Checks for `__slots__` and `__match_args__` @@ -106,6 +105,8 @@ pub(crate) fn sort_dunder_slots_ann_assign(checker: &mut Checker, node: &ast::St } } +const SORTING_STYLE: &SortingStyle = &SortingStyle::Natural; + /// Sort a tuple, list, dict or set that defines `__slots__` /// or `__match_args__` in a class scope. /// @@ -135,7 +136,7 @@ fn sort_dunder_slots(checker: &mut Checker, target: &ast::Expr, node: &ast::Expr return; }; - let sort_classification = SortClassification::of_elements(&elts_analysis.elts, natord::compare); + let sort_classification = SortClassification::of_elements(&elts_analysis.elts, SORTING_STYLE); if sort_classification.is_not_a_list_of_string_literals() || sort_classification.is_sorted() { return; } @@ -247,11 +248,7 @@ fn create_fix( let analyzed_sequence = MultilineStringSequenceValue::from_source_range(*range, display_kind, locator)?; assert_eq!(analyzed_sequence.len(), elts.len()); - analyzed_sequence.into_sorted_source_code( - |this, next| natord::compare(this.value(), next.value()), - locator, - checker.stylist(), - ) + analyzed_sequence.into_sorted_source_code(SORTING_STYLE, locator, checker.stylist()) } else { match display_kind { DisplayKind::Dict { values } => { @@ -262,7 +259,7 @@ fn create_fix( elts, items, locator, - natord::compare, + SORTING_STYLE, ), } } @@ -274,9 +271,14 @@ fn create_fix( } /// Create a string representing a fixed-up single-line -/// definition of `__all__` or `__slots__` (etc.), -/// that can be inserted into the -/// source code as a `range_replacement` autofix. +/// definition of a `__slots__` dictionary that can be +/// inserted into the source code as a `range_replacement` +/// autofix. +/// +/// N.B. This function could potentially be moved into +/// `sequence_sorting.rs` if any other modules need it, +/// but stays here for now, since this is currently the +/// only module that needs it pub(super) fn sort_single_line_elements_dict( key_elts: &[ast::Expr], elements: &[&str], @@ -288,7 +290,7 @@ pub(super) fn sort_single_line_elements_dict( let mut result = String::from('{'); let mut element_trios = izip!(elements, key_elts, value_elts).collect_vec(); - element_trios.sort_by(|(elem1, _, _), (elem2, _, _)| natord::compare(elem1, elem2)); + element_trios.sort_by(|(elem1, _, _), (elem2, _, _)| SORTING_STYLE.compare(elem1, elem2)); // We grab the original source-code ranges using `locator.slice()` // rather than using the expression generator, as this approach allows // us to easily preserve stylistic choices in the original source code From 902871a55f0108f4c48f59c7c0a67597aab13fdf Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Fri, 19 Jan 2024 19:06:25 +0000 Subject: [PATCH 11/21] explicitly test an edge case --- crates/ruff_linter/resources/test/fixtures/ruff/RUF023.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/ruff_linter/resources/test/fixtures/ruff/RUF023.py b/crates/ruff_linter/resources/test/fixtures/ruff/RUF023.py index 877de85ac5bb9..25ebf2cf73696 100644 --- a/crates/ruff_linter/resources/test/fixtures/ruff/RUF023.py +++ b/crates/ruff_linter/resources/test/fixtures/ruff/RUF023.py @@ -180,6 +180,8 @@ class Klass6: else: __slots__ += ["alpha", "omega"] + __slots__ = {"not": "sorted", "but": "includes", **a_kwarg_splat} + __slots__ = ("b", "a", "e", "d") __slots__ = ["b", "a", "e", "d"] __match_args__ = ["foo", "bar", "antipasti"] From b667d2f8ddeed3dee5a24057ffa00ecd1864297a Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Fri, 19 Jan 2024 19:37:28 +0000 Subject: [PATCH 12/21] split the assertion in two --- crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs index c7e98d2d153cf..da546677bcc52 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs @@ -285,7 +285,8 @@ pub(super) fn sort_single_line_elements_dict( value_elts: &[ast::Expr], locator: &Locator, ) -> String { - assert!(key_elts.len() == elements.len() && elements.len() == value_elts.len()); + assert_eq!(key_elts.len(), elements.len()); + assert_eq!(elements.len(), value_elts.len()); let last_item_index = elements.len().saturating_sub(1); let mut result = String::from('{'); From cf6ca7564eb268ab3098c3a11d3787c36501e40f Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Fri, 19 Jan 2024 19:47:08 +0000 Subject: [PATCH 13/21] Rename `NodeAnalysis` --- .../src/rules/ruff/rules/sort_dunder_slots.rs | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs index da546677bcc52..3be2c7d013e3d 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs @@ -132,11 +132,11 @@ fn sort_dunder_slots(checker: &mut Checker, target: &ast::Expr, node: &ast::Expr return; }; - let Some(elts_analysis) = NodeAnalysis::new(node, dunder_kind) else { + let Some(display) = StringLiteralDisplay::new(node, dunder_kind) else { return; }; - let sort_classification = SortClassification::of_elements(&elts_analysis.elts, SORTING_STYLE); + let sort_classification = SortClassification::of_elements(&display.elts, SORTING_STYLE); if sort_classification.is_not_a_list_of_string_literals() || sort_classification.is_sorted() { return; } @@ -146,11 +146,11 @@ fn sort_dunder_slots(checker: &mut Checker, target: &ast::Expr, node: &ast::Expr class_name: class_name.to_string(), class_variable: dunder_kind, }, - elts_analysis.range, + display.range, ); if let SortClassification::UnsortedAndMaybeFixable { items } = sort_classification { - if let Some(fix) = create_fix(&elts_analysis, &items, checker) { + if let Some(fix) = create_fix(&display, &items, checker) { diagnostic.set_fix(fix); } } @@ -158,14 +158,21 @@ fn sort_dunder_slots(checker: &mut Checker, target: &ast::Expr, node: &ast::Expr checker.diagnostics.push(diagnostic); } +/// Struct representing a [display](https://docs.python.org/3/reference/expressions.html#displays-for-lists-sets-and-dictionaries) +/// of string literals. #[derive(Debug)] -struct NodeAnalysis<'a> { +struct StringLiteralDisplay<'a> { + /// The elts from the original AST node representing the display. + /// Each elt is the AST representation of a single string literal + /// element in the display elts: Cow<'a, Vec>, + /// The source-code range of the display as a whole range: TextRange, + /// What kind of a display is it? A dict, set, list or tuple? display_kind: DisplayKind<'a>, } -impl<'a> NodeAnalysis<'a> { +impl<'a> StringLiteralDisplay<'a> { fn new(node: &'a ast::Expr, dunder_kind: SpecialClassDunder) -> Option { let result = match (dunder_kind, node) { (_, ast::Expr::List(ast::ExprList { elts, range, .. })) => { @@ -231,11 +238,11 @@ impl<'a> NodeAnalysis<'a> { } fn create_fix( - NodeAnalysis { + StringLiteralDisplay { elts, range, display_kind, - }: &NodeAnalysis, + }: &StringLiteralDisplay, items: &[&str], checker: &Checker, ) -> Option { From b04bdb0b0b23360fb1985738d5eee5ffb2a702dc Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Sun, 21 Jan 2024 18:37:04 +0000 Subject: [PATCH 14/21] fix a few nits --- .../src/rules/ruff/rules/sequence_sorting.rs | 12 ++++++++++-- .../src/rules/ruff/rules/sort_dunder_all.rs | 2 +- .../src/rules/ruff/rules/sort_dunder_slots.rs | 8 ++++++-- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs index 1f8906dc512ae..419af2c4f2408 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs @@ -192,7 +192,11 @@ pub(super) fn sort_single_line_elements_sequence( ) -> String { assert_eq!(elts.len(), elements.len()); let (opening_paren, closing_paren) = kind.surrounding_parens(locator.contents()); - let last_item_index = elements.len().saturating_sub(1); + assert!( + elements.len() >= 2, + "A sequence with < 2 elements cannot be unsorted" + ); + let last_item_index = elements.len() - 1; let mut result = String::from(opening_paren); let mut element_pairs = elements.iter().zip(elts).collect_vec(); @@ -273,7 +277,7 @@ pub(super) struct MultilineStringSequenceValue { } impl MultilineStringSequenceValue { - pub(super) fn len(&self) -> usize { + pub(super) fn num_items(&self) -> usize { self.items.len() } @@ -778,6 +782,10 @@ fn join_multiline_string_sequence_items( newline: &str, needs_trailing_comma: bool, ) -> String { + assert!( + sorted_items.len() >= 2, + "A sequence with < 2 items cannot be unsorted" + ); let last_item_index = sorted_items.len() - 1; let mut new_dunder_all = String::new(); diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs index 3df17a5933d99..5c02f3e30aab1 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs @@ -209,7 +209,7 @@ fn create_fix( // definitions: if is_multiline { let value = MultilineStringSequenceValue::from_source_range(range, kind, locator)?; - assert_eq!(value.len(), elts.len()); + assert_eq!(value.num_items(), elts.len()); value.into_sorted_source_code(SORTING_STYLE, locator, checker.stylist()) } else { sort_single_line_elements_sequence(kind, elts, string_items, locator, SORTING_STYLE) diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs index 3be2c7d013e3d..49ff4a264ba4a 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs @@ -254,7 +254,7 @@ fn create_fix( let display_kind = display_kind.as_sequence()?; let analyzed_sequence = MultilineStringSequenceValue::from_source_range(*range, display_kind, locator)?; - assert_eq!(analyzed_sequence.len(), elts.len()); + assert_eq!(analyzed_sequence.num_items(), elts.len()); analyzed_sequence.into_sorted_source_code(SORTING_STYLE, locator, checker.stylist()) } else { match display_kind { @@ -294,7 +294,11 @@ pub(super) fn sort_single_line_elements_dict( ) -> String { assert_eq!(key_elts.len(), elements.len()); assert_eq!(elements.len(), value_elts.len()); - let last_item_index = elements.len().saturating_sub(1); + assert!( + elements.len() >= 2, + "A sequence with < 2 elements cannot be unsorted" + ); + let last_item_index = elements.len() - 1; let mut result = String::from('{'); let mut element_trios = izip!(elements, key_elts, value_elts).collect_vec(); From 3dfdf8b896e2cb1add28f9ff8dfc02b5d60b6721 Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Sun, 21 Jan 2024 22:23:22 +0000 Subject: [PATCH 15/21] Address some Charlie comments --- .../src/rules/ruff/rules/sequence_sorting.rs | 35 +++++++++++++++---- .../src/rules/ruff/rules/sort_dunder_all.rs | 2 +- .../src/rules/ruff/rules/sort_dunder_slots.rs | 2 +- 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs index 419af2c4f2408..04b606f4ecba5 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs @@ -139,7 +139,7 @@ pub(super) enum SequenceKind<'a> { } impl SequenceKind<'_> { - fn surrounding_parens(&self, source: &str) -> (&'static str, &'static str) { + fn surrounding_brackets(&self, source: &str) -> (&'static str, &'static str) { match self { Self::List => ("[", "]"), Self::Set => ("{", "}"), @@ -191,7 +191,7 @@ pub(super) fn sort_single_line_elements_sequence( sorting_style: &SortingStyle, ) -> String { assert_eq!(elts.len(), elements.len()); - let (opening_paren, closing_paren) = kind.surrounding_parens(locator.contents()); + let (opening_paren, closing_paren) = kind.surrounding_brackets(locator.contents()); assert!( elements.len() >= 2, "A sequence with < 2 elements cannot be unsorted" @@ -226,7 +226,9 @@ pub(super) enum SortClassification<'a> { /// but we wouldn't be able to autofix it UnsortedButUnfixable, /// It's an unsorted display of string literals, - /// and it's possible we could generate a fix for it + /// and it's possible we could generate a fix for it; + /// here's the values of the elts so we can use them to + /// generate an autofix: UnsortedAndMaybeFixable { items: Vec<&'a str> }, /// The display contains one or more items that are not string /// literals. @@ -235,20 +237,37 @@ pub(super) enum SortClassification<'a> { impl<'a> SortClassification<'a> { pub(super) fn of_elements(elements: &'a [ast::Expr], sorting_style: &SortingStyle) -> Self { + // If it's of length less than 2, it has to be sorted already let Some((first, rest @ [_, ..])) = elements.split_first() else { return Self::Sorted; }; + + // If any elt we encounter is not an ExprStringLiteral AST node, + // that indicates at least one item in the sequence is not a string literal, + // which means the sequence is out of scope for RUF022/RUF023/etc. let Some(string_node) = first.as_string_literal_expr() else { return Self::NotAListOfStringLiterals; }; - let mut this = string_node.value.to_str(); + let mut current = string_node.value.to_str(); for expr in rest { let Some(string_node) = expr.as_string_literal_expr() else { return Self::NotAListOfStringLiterals; }; let next = string_node.value.to_str(); - if sorting_style.compare(next, this).is_lt() { + if sorting_style.compare(next, current).is_lt() { + // Looks like the sequence was not in fact already sorted! + // + // Now we need to gather the necessary information we'd need + // to create an autofix. We need to know three things for this: + // + // 1. Are all items in the sequence string literals? + // (If not, we won't even be emitting the violation, let alone + // trying to fix it.) + // 2. Are any items in the sequence implicitly concatenated? + // (If so, we might be *emitting* the violation, but we definitely + // won't be trying to fix it.) + // 3. What is the value of each elt in the sequence? let mut items = Vec::with_capacity(elements.len()); for expr in elements { let Some(string_node) = expr.as_string_literal_expr() else { @@ -261,8 +280,10 @@ impl<'a> SortClassification<'a> { } return Self::UnsortedAndMaybeFixable { items }; } - this = next; + current = next; } + // Looks like the sequence was already sorted -- hooray! + // We won't be emitting a violation this time. Self::Sorted } } @@ -277,7 +298,7 @@ pub(super) struct MultilineStringSequenceValue { } impl MultilineStringSequenceValue { - pub(super) fn num_items(&self) -> usize { + pub(super) fn len(&self) -> usize { self.items.len() } diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs index 5c02f3e30aab1..3df17a5933d99 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs @@ -209,7 +209,7 @@ fn create_fix( // definitions: if is_multiline { let value = MultilineStringSequenceValue::from_source_range(range, kind, locator)?; - assert_eq!(value.num_items(), elts.len()); + assert_eq!(value.len(), elts.len()); value.into_sorted_source_code(SORTING_STYLE, locator, checker.stylist()) } else { sort_single_line_elements_sequence(kind, elts, string_items, locator, SORTING_STYLE) diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs index 49ff4a264ba4a..85da6a34e1237 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs @@ -254,7 +254,7 @@ fn create_fix( let display_kind = display_kind.as_sequence()?; let analyzed_sequence = MultilineStringSequenceValue::from_source_range(*range, display_kind, locator)?; - assert_eq!(analyzed_sequence.num_items(), elts.len()); + assert_eq!(analyzed_sequence.len(), elts.len()); analyzed_sequence.into_sorted_source_code(SORTING_STYLE, locator, checker.stylist()) } else { match display_kind { From 606d85a74cfb8be467be0882fe94081c08463542 Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Sun, 21 Jan 2024 22:29:53 +0000 Subject: [PATCH 16/21] Fix edge-case bug involving `__slots__` or `__all__` that have non-string literals after a string literal that uses implicit concatenation --- crates/ruff_linter/resources/test/fixtures/ruff/RUF022.py | 2 ++ crates/ruff_linter/resources/test/fixtures/ruff/RUF023.py | 2 ++ .../ruff_linter/src/rules/ruff/rules/sequence_sorting.rs | 8 +++++--- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/crates/ruff_linter/resources/test/fixtures/ruff/RUF022.py b/crates/ruff_linter/resources/test/fixtures/ruff/RUF022.py index 5a749d3ed4dd1..7b93efba2d08b 100644 --- a/crates/ruff_linter/resources/test/fixtures/ruff/RUF022.py +++ b/crates/ruff_linter/resources/test/fixtures/ruff/RUF022.py @@ -306,3 +306,5 @@ class IntroducesNonModuleScope: __all__ = [ "foo", (), "bar" ] + +__all__ = "foo", "an" "implicitly_concatenated_second_item", not_a_string_literal diff --git a/crates/ruff_linter/resources/test/fixtures/ruff/RUF023.py b/crates/ruff_linter/resources/test/fixtures/ruff/RUF023.py index 25ebf2cf73696..655933c95deb2 100644 --- a/crates/ruff_linter/resources/test/fixtures/ruff/RUF023.py +++ b/crates/ruff_linter/resources/test/fixtures/ruff/RUF023.py @@ -203,6 +203,8 @@ class Klass6: "duplicate_element", # comment0 ) + __slots__ = "foo", "an" "implicitly_concatenated_second_item", not_a_string_literal + __slots__ =[ [] ] diff --git a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs index 04b606f4ecba5..87e1171dbeae4 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs @@ -269,15 +269,17 @@ impl<'a> SortClassification<'a> { // won't be trying to fix it.) // 3. What is the value of each elt in the sequence? let mut items = Vec::with_capacity(elements.len()); + let mut any_implicit_concatenation = false; for expr in elements { let Some(string_node) = expr.as_string_literal_expr() else { return Self::NotAListOfStringLiterals; }; - if string_node.value.is_implicit_concatenated() { - return Self::UnsortedButUnfixable; - } + any_implicit_concatenation |= string_node.value.is_implicit_concatenated(); items.push(string_node.value.to_str()); } + if any_implicit_concatenation { + return Self::UnsortedButUnfixable; + } return Self::UnsortedAndMaybeFixable { items }; } current = next; From 1f59a1691c46b1d7bec27c31a319780df6bb8b01 Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Sun, 21 Jan 2024 22:35:17 +0000 Subject: [PATCH 17/21] More Charlie comments --- .../src/rules/ruff/rules/sequence_sorting.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs index 87e1171dbeae4..df7043eb163e5 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs @@ -22,7 +22,17 @@ use natord; /// currently supported for displays of string literals #[derive(Debug)] pub(super) enum SortingStyle { + /// Sort string-literal items according to a + /// [natural sort](https://en.wikipedia.org/wiki/Natural_sort_order). Natural, + /// Sort string-literal items "isort-style". + /// + /// An isort-style sort orders items first according to their casing: + /// SCREAMING_SNAKE_CASE names (conventionally used for global constants) + /// come first, followed by CamelCase names (conventionally used for + /// classes), followed by anything else. Within each category, + /// a [natural sort](https://en.wikipedia.org/wiki/Natural_sort_order) + /// is used to order the elements. Isort, } @@ -176,7 +186,7 @@ impl SequenceKind<'_> { #[derive(Debug, is_macro::Is)] pub(super) enum DisplayKind<'a> { Sequence(SequenceKind<'a>), - Dict { values: &'a Vec }, + Dict { values: &'a [ast::Expr] }, } /// Create a string representing a fixed-up single-line From e64879a91ea16b09cf04ede8c6645cea3ba47757 Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Sun, 21 Jan 2024 22:53:30 +0000 Subject: [PATCH 18/21] Make `create_fix` a method in `sort_dunder_slots.rs` --- .../src/rules/ruff/rules/sort_dunder_slots.rs | 79 ++++++++++--------- 1 file changed, 40 insertions(+), 39 deletions(-) diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs index 85da6a34e1237..060a427378ec2 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs @@ -6,7 +6,7 @@ use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast as ast; use ruff_python_semantic::ScopeKind; use ruff_source_file::Locator; -use ruff_text_size::TextRange; +use ruff_text_size::{Ranged, TextRange}; use crate::checkers::ast::Checker; use crate::rules::ruff::rules::sequence_sorting::{ @@ -150,7 +150,7 @@ fn sort_dunder_slots(checker: &mut Checker, target: &ast::Expr, node: &ast::Expr ); if let SortClassification::UnsortedAndMaybeFixable { items } = sort_classification { - if let Some(fix) = create_fix(&display, &items, checker) { + if let Some(fix) = display.generate_fix(&items, checker) { diagnostic.set_fix(fix); } } @@ -172,6 +172,12 @@ struct StringLiteralDisplay<'a> { display_kind: DisplayKind<'a>, } +impl Ranged for StringLiteralDisplay<'_> { + fn range(&self) -> TextRange { + self.range + } +} + impl<'a> StringLiteralDisplay<'a> { fn new(node: &'a ast::Expr, dunder_kind: SpecialClassDunder) -> Option { let result = match (dunder_kind, node) { @@ -235,46 +241,41 @@ impl<'a> StringLiteralDisplay<'a> { }; Some(result) } -} -fn create_fix( - StringLiteralDisplay { - elts, - range, - display_kind, - }: &StringLiteralDisplay, - items: &[&str], - checker: &Checker, -) -> Option { - let locator = checker.locator(); - let is_multiline = locator.contains_line_break(*range); - let sorted_source_code = { - if is_multiline { - // Sorting multiline dicts is unsupported - let display_kind = display_kind.as_sequence()?; - let analyzed_sequence = - MultilineStringSequenceValue::from_source_range(*range, display_kind, locator)?; - assert_eq!(analyzed_sequence.len(), elts.len()); - analyzed_sequence.into_sorted_source_code(SORTING_STYLE, locator, checker.stylist()) - } else { - match display_kind { - DisplayKind::Dict { values } => { - sort_single_line_elements_dict(elts, items, values, locator) - } - DisplayKind::Sequence(sequence_kind) => sort_single_line_elements_sequence( - sequence_kind, - elts, - items, + fn generate_fix(&self, items: &[&str], checker: &Checker) -> Option { + let locator = checker.locator(); + let is_multiline = locator.contains_line_break(self.range()); + let sorted_source_code = { + if is_multiline { + // Sorting multiline dicts is unsupported + let display_kind = self.display_kind.as_sequence()?; + let analyzed_sequence = MultilineStringSequenceValue::from_source_range( + self.range(), + display_kind, locator, - SORTING_STYLE, - ), + )?; + assert_eq!(analyzed_sequence.len(), self.elts.len()); + analyzed_sequence.into_sorted_source_code(SORTING_STYLE, locator, checker.stylist()) + } else { + match &self.display_kind { + DisplayKind::Dict { values } => { + sort_single_line_elements_dict(&self.elts, items, values, locator) + } + DisplayKind::Sequence(sequence_kind) => sort_single_line_elements_sequence( + sequence_kind, + &self.elts, + items, + locator, + SORTING_STYLE, + ), + } } - } - }; - Some(Fix::safe_edit(Edit::range_replacement( - sorted_source_code, - *range, - ))) + }; + Some(Fix::safe_edit(Edit::range_replacement( + sorted_source_code, + self.range, + ))) + } } /// Create a string representing a fixed-up single-line From 6645934e0f2d3e53071edc1b30a4d17e835ed76e Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Mon, 22 Jan 2024 00:00:50 +0000 Subject: [PATCH 19/21] Put all the validation logic for the elements into a newtype constructor method --- .../src/rules/ruff/rules/sequence_sorting.rs | 47 +++++++++++++------ .../src/rules/ruff/rules/sort_dunder_slots.rs | 42 ++++++++++++----- 2 files changed, 63 insertions(+), 26 deletions(-) diff --git a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs index df7043eb163e5..ab21d456ff6cb 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs @@ -15,7 +15,6 @@ use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange, TextSize}; use is_macro; -use itertools::Itertools; use natord; /// An enumeration of the different sorting styles @@ -189,6 +188,34 @@ pub(super) enum DisplayKind<'a> { Dict { values: &'a [ast::Expr] }, } +struct SequenceElements<'a>(Vec<(&'a &'a str, &'a ast::Expr)>); + +impl<'a> SequenceElements<'a> { + fn new(elements: &'a [&str], elts: &'a [ast::Expr]) -> Self { + assert_eq!(elements.len(), elts.len()); + assert!( + elements.len() >= 2, + "A sequence with < 2 elements cannot be unsorted" + ); + Self(elements.iter().zip(elts).collect()) + } + + fn last_item_index(&self) -> usize { + // Safe from underflow, as the constructor guarantees + // that the underlying vector has length >= 2 + self.0.len() - 1 + } + + fn into_sorted_elts( + mut self, + sorting_style: &SortingStyle, + ) -> impl Iterator { + self.0 + .sort_by(|(elem1, _), (elem2, _)| sorting_style.compare(elem1, elem2)); + self.0.into_iter().map(|(_, elt)| elt) + } +} + /// Create a string representing a fixed-up single-line /// definition of `__all__` or `__slots__` (etc.), /// that can be inserted into the @@ -200,28 +227,20 @@ pub(super) fn sort_single_line_elements_sequence( locator: &Locator, sorting_style: &SortingStyle, ) -> String { - assert_eq!(elts.len(), elements.len()); + let element_pairs = SequenceElements::new(elements, elts); + let last_item_index = element_pairs.last_item_index(); let (opening_paren, closing_paren) = kind.surrounding_brackets(locator.contents()); - assert!( - elements.len() >= 2, - "A sequence with < 2 elements cannot be unsorted" - ); - let last_item_index = elements.len() - 1; let mut result = String::from(opening_paren); - - let mut element_pairs = elements.iter().zip(elts).collect_vec(); - element_pairs.sort_by(|(elem1, _), (elem2, _)| sorting_style.compare(elem1, elem2)); // We grab the original source-code ranges using `locator.slice()` // rather than using the expression generator, as this approach allows // us to easily preserve stylistic choices in the original source code // such as whether double or single quotes were used. - for (i, (_, elt)) in element_pairs.iter().enumerate() { + for (i, elt) in element_pairs.into_sorted_elts(sorting_style).enumerate() { result.push_str(locator.slice(elt)); if i < last_item_index { result.push_str(", "); } } - result.push_str(closing_paren); result } @@ -595,8 +614,8 @@ impl LineState { #[derive(Debug)] struct LineWithJustAComment(TextRange); -/// Instances of this struct represent source-code lines in single-line -/// or multiline tuples/lists/sets where the line contains at least +/// Instances of this struct represent source-code lines in +/// multiline tuples/lists/sets where the line contains at least /// 1 element of the sequence. The line may contain > 1 element of the /// sequence, and may also have a trailing comment after the element(s). #[derive(Debug)] diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs index 060a427378ec2..da405b71b6296 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs @@ -14,7 +14,7 @@ use crate::rules::ruff::rules::sequence_sorting::{ SortClassification, SortingStyle, }; -use itertools::{izip, Itertools}; +use itertools::izip; /// ## What it does /// Checks for `__slots__` and `__match_args__` @@ -278,6 +278,32 @@ impl<'a> StringLiteralDisplay<'a> { } } +struct DictElements<'a>(Vec<(&'a &'a str, &'a ast::Expr, &'a ast::Expr)>); + +impl<'a> DictElements<'a> { + fn new(elements: &'a [&str], key_elts: &'a [ast::Expr], value_elts: &'a [ast::Expr]) -> Self { + assert_eq!(key_elts.len(), elements.len()); + assert_eq!(elements.len(), value_elts.len()); + assert!( + elements.len() >= 2, + "A sequence with < 2 elements cannot be unsorted" + ); + Self(izip!(elements, key_elts, value_elts).collect()) + } + + fn last_item_index(&self) -> usize { + // Safe from underflow, as the constructor guarantees + // that the underlying vector has length >= 2 + self.0.len() - 1 + } + + fn into_sorted_elts(mut self) -> impl Iterator { + self.0 + .sort_by(|(elem1, _, _), (elem2, _, _)| SORTING_STYLE.compare(elem1, elem2)); + self.0.into_iter().map(|(_, key, value)| (key, value)) + } +} + /// Create a string representing a fixed-up single-line /// definition of a `__slots__` dictionary that can be /// inserted into the source code as a `range_replacement` @@ -293,22 +319,14 @@ pub(super) fn sort_single_line_elements_dict( value_elts: &[ast::Expr], locator: &Locator, ) -> String { - assert_eq!(key_elts.len(), elements.len()); - assert_eq!(elements.len(), value_elts.len()); - assert!( - elements.len() >= 2, - "A sequence with < 2 elements cannot be unsorted" - ); - let last_item_index = elements.len() - 1; + let element_trios = DictElements::new(elements, key_elts, value_elts); + let last_item_index = element_trios.last_item_index(); let mut result = String::from('{'); - - let mut element_trios = izip!(elements, key_elts, value_elts).collect_vec(); - element_trios.sort_by(|(elem1, _, _), (elem2, _, _)| SORTING_STYLE.compare(elem1, elem2)); // We grab the original source-code ranges using `locator.slice()` // rather than using the expression generator, as this approach allows // us to easily preserve stylistic choices in the original source code // such as whether double or single quotes were used. - for (i, (_, key, value)) in element_trios.iter().enumerate() { + for (i, (key, value)) in element_trios.into_sorted_elts().enumerate() { result.push_str(locator.slice(key)); result.push_str(": "); result.push_str(locator.slice(value)); From 0ab0682bdbb6587e89047220f6531b88a802dba3 Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Mon, 22 Jan 2024 08:01:36 +0000 Subject: [PATCH 20/21] Implement `Clone` and `Copy` for `SortingStyle` --- .../src/rules/ruff/rules/sequence_sorting.rs | 12 ++++++------ .../src/rules/ruff/rules/sort_dunder_all.rs | 2 +- .../src/rules/ruff/rules/sort_dunder_slots.rs | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs index ab21d456ff6cb..236ed5797c945 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs @@ -19,7 +19,7 @@ use natord; /// An enumeration of the different sorting styles /// currently supported for displays of string literals -#[derive(Debug)] +#[derive(Debug, Clone, Copy)] pub(super) enum SortingStyle { /// Sort string-literal items according to a /// [natural sort](https://en.wikipedia.org/wiki/Natural_sort_order). @@ -36,7 +36,7 @@ pub(super) enum SortingStyle { } impl SortingStyle { - pub(super) fn compare(&self, a: &str, b: &str) -> Ordering { + pub(super) fn compare(self, a: &str, b: &str) -> Ordering { match self { Self::Natural => natord::compare(a, b), Self::Isort => IsortSortKey::from(a).cmp(&IsortSortKey::from(b)), @@ -208,7 +208,7 @@ impl<'a> SequenceElements<'a> { fn into_sorted_elts( mut self, - sorting_style: &SortingStyle, + sorting_style: SortingStyle, ) -> impl Iterator { self.0 .sort_by(|(elem1, _), (elem2, _)| sorting_style.compare(elem1, elem2)); @@ -225,7 +225,7 @@ pub(super) fn sort_single_line_elements_sequence( elts: &[ast::Expr], elements: &[&str], locator: &Locator, - sorting_style: &SortingStyle, + sorting_style: SortingStyle, ) -> String { let element_pairs = SequenceElements::new(elements, elts); let last_item_index = element_pairs.last_item_index(); @@ -265,7 +265,7 @@ pub(super) enum SortClassification<'a> { } impl<'a> SortClassification<'a> { - pub(super) fn of_elements(elements: &'a [ast::Expr], sorting_style: &SortingStyle) -> Self { + pub(super) fn of_elements(elements: &'a [ast::Expr], sorting_style: SortingStyle) -> Self { // If it's of length less than 2, it has to be sorted already let Some((first, rest @ [_, ..])) = elements.split_first() else { return Self::Sorted; @@ -374,7 +374,7 @@ impl MultilineStringSequenceValue { /// so this is a logic error. pub(super) fn into_sorted_source_code( mut self, - sorting_style: &SortingStyle, + sorting_style: SortingStyle, locator: &Locator, stylist: &Stylist, ) -> String { diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs index 3df17a5933d99..341bff19c5bdf 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs @@ -77,7 +77,7 @@ impl Violation for UnsortedDunderAll { } } -const SORTING_STYLE: &SortingStyle = &SortingStyle::Isort; +const SORTING_STYLE: SortingStyle = SortingStyle::Isort; /// Sort an `__all__` definition represented by a `StmtAssign` AST node. /// For example: `__all__ = ["b", "c", "a"]`. diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs index da405b71b6296..15caad43eb8ee 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs @@ -105,7 +105,7 @@ pub(crate) fn sort_dunder_slots_ann_assign(checker: &mut Checker, node: &ast::St } } -const SORTING_STYLE: &SortingStyle = &SortingStyle::Natural; +const SORTING_STYLE: SortingStyle = SortingStyle::Natural; /// Sort a tuple, list, dict or set that defines `__slots__` /// or `__match_args__` in a class scope. From 333fe620bdd54ffc24183c7fc795f47cd7ff6924 Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Mon, 22 Jan 2024 12:14:51 +0000 Subject: [PATCH 21/21] Small cleanups --- .../src/rules/ruff/rules/sequence_sorting.rs | 30 +++++----- .../src/rules/ruff/rules/sort_dunder_slots.rs | 58 ++++++++++++------- 2 files changed, 50 insertions(+), 38 deletions(-) diff --git a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs index 236ed5797c945..9648fed7eac13 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs @@ -89,12 +89,6 @@ impl<'a> From<&'a str> for IsortSortKey<'a> { } } -impl<'a> From<&'a StringSequenceItem> for IsortSortKey<'a> { - fn from(item: &'a StringSequenceItem) -> Self { - Self::from(item.value.as_str()) - } -} - /// Classification for the casing of an element in a /// sequence of literal strings. /// @@ -135,9 +129,9 @@ impl InferredMemberType { /// since in terms of the AST structure it's almost identical /// to tuples/lists.) /// -/// Whereas lists, dicts and sets are always parenthesized +/// Whereas list, dict and set literals are always parenthesized /// (e.g. lists always start with `[` and end with `]`), -/// single-line tuples *can* be unparenthesized. +/// single-line tuple literals *can* be unparenthesized. /// We keep the original AST node around for the /// Tuple variant so that this can be queried later. #[derive(Debug)] @@ -148,6 +142,9 @@ pub(super) enum SequenceKind<'a> { } impl SequenceKind<'_> { + // N.B. We only need the source code for the Tuple variant here, + // but if you already have a `Locator` instance handy, + // getting the source code is very cheap. fn surrounding_brackets(&self, source: &str) -> (&'static str, &'static str) { match self { Self::List => ("[", "]"), @@ -179,15 +176,14 @@ impl SequenceKind<'_> { } } -/// An enumeration of the various kinds of -/// [display literals](https://docs.python.org/3/reference/expressions.html#displays-for-lists-sets-and-dictionaries) -/// Python provides for builtin containers. -#[derive(Debug, is_macro::Is)] -pub(super) enum DisplayKind<'a> { - Sequence(SequenceKind<'a>), - Dict { values: &'a [ast::Expr] }, -} - +/// A newtype that zips together the string values of a display literal's elts, +/// together with the original AST nodes for that display literal's elts. +/// +/// The main purpose of separating this out into a separate struct +/// is to enforce the invariants that: +/// +/// 1. The two iterables that are zipped together have the same length; and, +/// 2. The length of both iterables is >= 2 struct SequenceElements<'a>(Vec<(&'a &'a str, &'a ast::Expr)>); impl<'a> SequenceElements<'a> { diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs index 15caad43eb8ee..bac21f6befe88 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs @@ -10,7 +10,7 @@ use ruff_text_size::{Ranged, TextRange}; use crate::checkers::ast::Checker; use crate::rules::ruff::rules::sequence_sorting::{ - sort_single_line_elements_sequence, DisplayKind, MultilineStringSequenceValue, SequenceKind, + sort_single_line_elements_sequence, MultilineStringSequenceValue, SequenceKind, SortClassification, SortingStyle, }; @@ -245,30 +245,27 @@ impl<'a> StringLiteralDisplay<'a> { fn generate_fix(&self, items: &[&str], checker: &Checker) -> Option { let locator = checker.locator(); let is_multiline = locator.contains_line_break(self.range()); - let sorted_source_code = { - if is_multiline { - // Sorting multiline dicts is unsupported - let display_kind = self.display_kind.as_sequence()?; + let sorted_source_code = match (&self.display_kind, is_multiline) { + (DisplayKind::Sequence(sequence_kind), true) => { let analyzed_sequence = MultilineStringSequenceValue::from_source_range( self.range(), - display_kind, + sequence_kind, locator, )?; assert_eq!(analyzed_sequence.len(), self.elts.len()); analyzed_sequence.into_sorted_source_code(SORTING_STYLE, locator, checker.stylist()) - } else { - match &self.display_kind { - DisplayKind::Dict { values } => { - sort_single_line_elements_dict(&self.elts, items, values, locator) - } - DisplayKind::Sequence(sequence_kind) => sort_single_line_elements_sequence( - sequence_kind, - &self.elts, - items, - locator, - SORTING_STYLE, - ), - } + } + // Sorting multiline dicts is unsupported + (DisplayKind::Dict { .. }, true) => return None, + (DisplayKind::Sequence(sequence_kind), false) => sort_single_line_elements_sequence( + sequence_kind, + &self.elts, + items, + locator, + SORTING_STYLE, + ), + (DisplayKind::Dict { values }, false) => { + sort_single_line_elements_dict(&self.elts, items, values, locator) } }; Some(Fix::safe_edit(Edit::range_replacement( @@ -278,6 +275,26 @@ impl<'a> StringLiteralDisplay<'a> { } } +/// An enumeration of the various kinds of +/// [display literals](https://docs.python.org/3/reference/expressions.html#displays-for-lists-sets-and-dictionaries) +/// Python provides for builtin containers. +#[derive(Debug)] +enum DisplayKind<'a> { + Sequence(SequenceKind<'a>), + Dict { values: &'a [ast::Expr] }, +} + +/// A newtype that zips together three iterables: +/// +/// 1. The string values of a dict literal's keys; +/// 2. The original AST nodes for the dict literal's keys; and, +/// 3. The original AST nodes for the dict literal's values +/// +/// The main purpose of separating this out into a separate struct +/// is to enforce the invariants that: +/// +/// 1. The three iterables that are zipped together have the same length; and, +/// 2. The length of all three iterables is >= 2 struct DictElements<'a>(Vec<(&'a &'a str, &'a ast::Expr, &'a ast::Expr)>); impl<'a> DictElements<'a> { @@ -313,7 +330,7 @@ impl<'a> DictElements<'a> { /// `sequence_sorting.rs` if any other modules need it, /// but stays here for now, since this is currently the /// only module that needs it -pub(super) fn sort_single_line_elements_dict( +fn sort_single_line_elements_dict( key_elts: &[ast::Expr], elements: &[&str], value_elts: &[ast::Expr], @@ -334,7 +351,6 @@ pub(super) fn sort_single_line_elements_dict( result.push_str(", "); } } - result.push('}'); result }