diff --git a/crates/ruff_linter/resources/test/fixtures/ruff/RUF022.py b/crates/ruff_linter/resources/test/fixtures/ruff/RUF022.py index 5a749d3ed4dd1..7b93efba2d08b 100644 --- a/crates/ruff_linter/resources/test/fixtures/ruff/RUF022.py +++ b/crates/ruff_linter/resources/test/fixtures/ruff/RUF022.py @@ -306,3 +306,5 @@ class IntroducesNonModuleScope: __all__ = [ "foo", (), "bar" ] + +__all__ = "foo", "an" "implicitly_concatenated_second_item", not_a_string_literal diff --git a/crates/ruff_linter/resources/test/fixtures/ruff/RUF023.py b/crates/ruff_linter/resources/test/fixtures/ruff/RUF023.py new file mode 100644 index 0000000000000..655933c95deb2 --- /dev/null +++ b/crates/ruff_linter/resources/test/fixtures/ruff/RUF023.py @@ -0,0 +1,234 @@ +######################### +# Single-line definitions +######################### + +class Klass: + __slots__ = ["d", "c", "b", "a"] # a comment that is untouched + __match_args__ = ("d", "c", "b", "a") + + # Quoting style is retained, + # but unnecessary parens are not + __slots__: set = {'b', "c", ((('a')))} + # Trailing commas are also not retained for single-line definitions + # (but they are in multiline definitions) + __match_args__: tuple = ("b", "c", "a",) + +class Klass2: + if bool(): + __slots__ = {"x": "docs for x", "m": "docs for m", "a": "docs for a"} + else: + __slots__ = "foo3", "foo2", "foo1" # NB: an implicit tuple (without parens) + + __match_args__: list[str] = ["the", "three", "little", "pigs"] + __slots__ = ("parenthesized_item"), "in", ("an_unparenthesized_tuple") + # we use natural sort, + # not alphabetical sort or "isort-style" sort + __slots__ = {"aadvark237", "aadvark10092", "aadvark174", "aadvark532"} + +############################ +# Neat multiline definitions +############################ + +class Klass3: + __slots__ = ( + "d0", + "c0", # a comment regarding 'c0' + "b0", + # a comment regarding 'a0': + "a0" + ) + __match_args__ = [ + "d", + "c", # a comment regarding 'c' + "b", + # a comment regarding 'a': + "a" + ] + +########################################## +# Messier multiline __all__ definitions... +########################################## + +class Klass4: + # comment0 + __slots__ = ("d", "a", # comment1 + # comment2 + "f", "b", + "strangely", # comment3 + # comment4 + "formatted", + # comment5 + ) # comment6 + # comment7 + + __match_args__ = [ # comment0 + # comment1 + # comment2 + "dx", "cx", "bx", "ax" # comment3 + # comment4 + # comment5 + # comment6 + ] # comment7 + +# from cpython/Lib/pathlib/__init__.py +class PurePath: + __slots__ = ( + # The `_raw_paths` slot stores unnormalized string paths. This is set + # in the `__init__()` method. + '_raw_paths', + + # The `_drv`, `_root` and `_tail_cached` slots store parsed and + # normalized parts of the path. They are set when any of the `drive`, + # `root` or `_tail` properties are accessed for the first time. The + # three-part division corresponds to the result of + # `os.path.splitroot()`, except that the tail is further split on path + # separators (i.e. it is a list of strings), and that the root and + # tail are normalized. + '_drv', '_root', '_tail_cached', + + # The `_str` slot stores the string representation of the path, + # computed from the drive, root and tail when `__str__()` is called + # for the first time. It's used to implement `_str_normcase` + '_str', + + # The `_str_normcase_cached` slot stores the string path with + # normalized case. It is set when the `_str_normcase` property is + # accessed for the first time. It's used to implement `__eq__()` + # `__hash__()`, and `_parts_normcase` + '_str_normcase_cached', + + # The `_parts_normcase_cached` slot stores the case-normalized + # string path after splitting on path separators. It's set when the + # `_parts_normcase` property is accessed for the first time. It's used + # to implement comparison methods like `__lt__()`. + '_parts_normcase_cached', + + # The `_hash` slot stores the hash of the case-normalized string + # path. It's set when `__hash__()` is called for the first time. + '_hash', + ) + +# From cpython/Lib/pickletools.py +class ArgumentDescriptor(object): + __slots__ = ( + # name of descriptor record, also a module global name; a string + 'name', + + # length of argument, in bytes; an int; UP_TO_NEWLINE and + # TAKEN_FROM_ARGUMENT{1,4,8} are negative values for variable-length + # cases + 'n', + + # a function taking a file-like object, reading this kind of argument + # from the object at the current position, advancing the current + # position by n bytes, and returning the value of the argument + 'reader', + + # human-readable docs for this arg descriptor; a string + 'doc', + ) + +#################################### +# Should be flagged, but not fixed +#################################### + +# from cpython/Lib/test/test_inspect.py. +# Multiline dicts are out of scope. +class SlotUser: + __slots__ = {'power': 'measured in kilowatts', + 'distance': 'measured in kilometers'} + +class Klass5: + __match_args__ = ( + "look", + ( + "a_veeeeeeeeeeeeeeeeeeery_long_parenthesized_item" + ), + ) + __slots__ = ( + "b", + (( + "c" + )), + "a" + ) + __slots__ = ("don't" "care" "about", "__slots__" "with", "concatenated" "strings") + +################################### +# These should all not get flagged: +################################### + +class Klass6: + __slots__ = () + __match_args__ = [] + __slots__ = ("single_item",) + __match_args__ = ( + "single_item_multiline", + ) + __slots__ = {"single_item",} + __slots__ = {"single_item_no_trailing_comma": "docs for that"} + __match_args__ = [ + "single_item_multiline_no_trailing_comma" + ] + __slots__ = ("not_a_tuple_just_a_string") + __slots__ = ["a", "b", "c", "d"] + __slots__ += ["e", "f", "g"] + __slots__ = ("a", "b", "c", "d") + + if bool(): + __slots__ += ("e", "f", "g") + else: + __slots__ += ["alpha", "omega"] + + __slots__ = {"not": "sorted", "but": "includes", **a_kwarg_splat} + +__slots__ = ("b", "a", "e", "d") +__slots__ = ["b", "a", "e", "d"] +__match_args__ = ["foo", "bar", "antipasti"] + +class Klass6: + __slots__ = (9, 8, 7) + __match_args__ = ( # This is just an empty tuple, + # but, + # it's very well + ) # documented + + # We don't deduplicate elements; + # this just ensures that duplicate elements aren't unnecessarily + # reordered by an autofix: + __slots__ = ( + "duplicate_element", # comment1 + "duplicate_element", # comment3 + "duplicate_element", # comment2 + "duplicate_element", # comment0 + ) + + __slots__ = "foo", "an" "implicitly_concatenated_second_item", not_a_string_literal + + __slots__ =[ + [] + ] + __slots__ = [ + () + ] + __match_args__ = ( + () + ) + __match_args__ = ( + [] + ) + __slots__ = ( + (), + ) + __slots__ = ( + [], + ) + __match_args__ = ( + "foo", [], "bar" + ) + __match_args__ = [ + "foo", (), "bar" + ] + + __match_args__ = {"a", "set", "for", "__match_args__", "is invalid"} + __match_args__ = {"this": "is", "also": "invalid"} diff --git a/crates/ruff_linter/src/checkers/ast/analyze/statement.rs b/crates/ruff_linter/src/checkers/ast/analyze/statement.rs index b3a994593992f..33ca106673198 100644 --- a/crates/ruff_linter/src/checkers/ast/analyze/statement.rs +++ b/crates/ruff_linter/src/checkers/ast/analyze/statement.rs @@ -1450,6 +1450,9 @@ pub(crate) fn statement(stmt: &Stmt, checker: &mut Checker) { if checker.settings.rules.enabled(Rule::UnsortedDunderAll) { ruff::rules::sort_dunder_all_assign(checker, assign); } + if checker.enabled(Rule::UnsortedDunderSlots) { + ruff::rules::sort_dunder_slots_assign(checker, assign); + } if checker.source_type.is_stub() { if checker.any_enabled(&[ Rule::UnprefixedTypeParam, @@ -1523,6 +1526,9 @@ pub(crate) fn statement(stmt: &Stmt, checker: &mut Checker) { if checker.settings.rules.enabled(Rule::UnsortedDunderAll) { ruff::rules::sort_dunder_all_ann_assign(checker, assign_stmt); } + if checker.enabled(Rule::UnsortedDunderSlots) { + ruff::rules::sort_dunder_slots_ann_assign(checker, assign_stmt); + } if checker.source_type.is_stub() { if let Some(value) = value { if checker.enabled(Rule::AssignmentDefaultInStub) { diff --git a/crates/ruff_linter/src/codes.rs b/crates/ruff_linter/src/codes.rs index f050700a79b17..a049589958739 100644 --- a/crates/ruff_linter/src/codes.rs +++ b/crates/ruff_linter/src/codes.rs @@ -926,6 +926,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> { (Ruff, "020") => (RuleGroup::Preview, rules::ruff::rules::NeverUnion), (Ruff, "021") => (RuleGroup::Preview, rules::ruff::rules::ParenthesizeChainedOperators), (Ruff, "022") => (RuleGroup::Preview, rules::ruff::rules::UnsortedDunderAll), + (Ruff, "023") => (RuleGroup::Preview, rules::ruff::rules::UnsortedDunderSlots), (Ruff, "024") => (RuleGroup::Preview, rules::ruff::rules::MutableFromkeysValue), (Ruff, "100") => (RuleGroup::Stable, rules::ruff::rules::UnusedNOQA), (Ruff, "200") => (RuleGroup::Stable, rules::ruff::rules::InvalidPyprojectToml), diff --git a/crates/ruff_linter/src/rules/ruff/mod.rs b/crates/ruff_linter/src/rules/ruff/mod.rs index 6c69271138aea..26abe0ae8a93c 100644 --- a/crates/ruff_linter/src/rules/ruff/mod.rs +++ b/crates/ruff_linter/src/rules/ruff/mod.rs @@ -43,6 +43,7 @@ mod tests { #[test_case(Rule::NeverUnion, Path::new("RUF020.py"))] #[test_case(Rule::ParenthesizeChainedOperators, Path::new("RUF021.py"))] #[test_case(Rule::UnsortedDunderAll, Path::new("RUF022.py"))] + #[test_case(Rule::UnsortedDunderSlots, Path::new("RUF023.py"))] #[test_case(Rule::MutableFromkeysValue, Path::new("RUF024.py"))] fn rules(rule_code: Rule, path: &Path) -> Result<()> { let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy()); diff --git a/crates/ruff_linter/src/rules/ruff/rules/mod.rs b/crates/ruff_linter/src/rules/ruff/rules/mod.rs index b73c614e64b5d..58324facbc6ad 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/mod.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/mod.rs @@ -15,6 +15,7 @@ pub(crate) use pairwise_over_zipped::*; pub(crate) use parenthesize_logical_operators::*; pub(crate) use quadratic_list_summation::*; pub(crate) use sort_dunder_all::*; +pub(crate) use sort_dunder_slots::*; pub(crate) use static_key_dict_comprehension::*; pub(crate) use unnecessary_iterable_allocation_for_first_element::*; pub(crate) use unnecessary_key_check::*; @@ -37,7 +38,9 @@ mod mutable_fromkeys_value; mod never_union; mod pairwise_over_zipped; mod parenthesize_logical_operators; +mod sequence_sorting; mod sort_dunder_all; +mod sort_dunder_slots; mod static_key_dict_comprehension; mod unnecessary_iterable_allocation_for_first_element; mod unnecessary_key_check; diff --git a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs new file mode 100644 index 0000000000000..9648fed7eac13 --- /dev/null +++ b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs @@ -0,0 +1,926 @@ +/// Utilities for sorting constant lists of string literals. +/// +/// Examples where these are useful: +/// - Sorting `__all__` in the global scope, +/// - Sorting `__slots__` or `__match_args__` in a class scope +use std::borrow::Cow; +use std::cmp::Ordering; + +use ruff_python_ast as ast; +use ruff_python_codegen::Stylist; +use ruff_python_parser::{lexer, Mode, Tok, TokenKind}; +use ruff_python_stdlib::str::is_cased_uppercase; +use ruff_python_trivia::leading_indentation; +use ruff_source_file::Locator; +use ruff_text_size::{Ranged, TextRange, TextSize}; + +use is_macro; +use natord; + +/// An enumeration of the different sorting styles +/// currently supported for displays of string literals +#[derive(Debug, Clone, Copy)] +pub(super) enum SortingStyle { + /// Sort string-literal items according to a + /// [natural sort](https://en.wikipedia.org/wiki/Natural_sort_order). + Natural, + /// Sort string-literal items "isort-style". + /// + /// An isort-style sort orders items first according to their casing: + /// SCREAMING_SNAKE_CASE names (conventionally used for global constants) + /// come first, followed by CamelCase names (conventionally used for + /// classes), followed by anything else. Within each category, + /// a [natural sort](https://en.wikipedia.org/wiki/Natural_sort_order) + /// is used to order the elements. + Isort, +} + +impl SortingStyle { + pub(super) fn compare(self, a: &str, b: &str) -> Ordering { + match self { + Self::Natural => natord::compare(a, b), + Self::Isort => IsortSortKey::from(a).cmp(&IsortSortKey::from(b)), + } + } +} + +/// A struct to implement logic necessary to achieve +/// an "isort-style sort". +/// +/// An isort-style sort sorts items first according to their casing: +/// `SCREAMING_SNAKE_CASE` names (conventionally used for global constants) +/// come first, followed by CamelCase names (conventionally used for +/// classes), followed by anything else. Within each category, +/// a [natural sort](https://en.wikipedia.org/wiki/Natural_sort_order) +/// is used to order the elements. +struct IsortSortKey<'a> { + category: InferredMemberType, + value: &'a str, +} + +impl Ord for IsortSortKey<'_> { + fn cmp(&self, other: &Self) -> Ordering { + self.category + .cmp(&other.category) + .then_with(|| natord::compare(self.value, other.value)) + } +} + +impl PartialOrd for IsortSortKey<'_> { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl PartialEq for IsortSortKey<'_> { + fn eq(&self, other: &Self) -> bool { + self.cmp(other) == Ordering::Equal + } +} + +impl Eq for IsortSortKey<'_> {} + +impl<'a> From<&'a str> for IsortSortKey<'a> { + fn from(value: &'a str) -> Self { + Self { + category: InferredMemberType::of(value), + value, + } + } +} + +/// Classification for the casing of an element in a +/// sequence of literal strings. +/// +/// This is necessary to achieve an "isort-style" sort, +/// where elements are sorted first by category, +/// then, within categories, are sorted according +/// to a natural sort. +/// +/// You'll notice that a very similar enum exists +/// in ruff's reimplementation of isort. +#[derive(Debug, Ord, PartialOrd, Eq, PartialEq, Clone, Copy)] +enum InferredMemberType { + Constant, + Class, + Other, +} + +impl InferredMemberType { + fn of(value: &str) -> Self { + // E.g. `CONSTANT` + if value.len() > 1 && is_cased_uppercase(value) { + Self::Constant + // E.g. `Class` + } else if value.starts_with(char::is_uppercase) { + Self::Class + // E.g. `some_variable` or `some_function` + } else { + Self::Other + } + } +} + +/// An enumeration of the various kinds of sequences for which Python has +/// [display literals](https://docs.python.org/3/reference/expressions.html#displays-for-lists-sets-and-dictionaries). +/// +/// (I'm aware a set isn't actually a "sequence", +/// *but* for our purposes it's conceptually a sequence, +/// since in terms of the AST structure it's almost identical +/// to tuples/lists.) +/// +/// Whereas list, dict and set literals are always parenthesized +/// (e.g. lists always start with `[` and end with `]`), +/// single-line tuple literals *can* be unparenthesized. +/// We keep the original AST node around for the +/// Tuple variant so that this can be queried later. +#[derive(Debug)] +pub(super) enum SequenceKind<'a> { + List, + Set, + Tuple(&'a ast::ExprTuple), +} + +impl SequenceKind<'_> { + // N.B. We only need the source code for the Tuple variant here, + // but if you already have a `Locator` instance handy, + // getting the source code is very cheap. + fn surrounding_brackets(&self, source: &str) -> (&'static str, &'static str) { + match self { + Self::List => ("[", "]"), + Self::Set => ("{", "}"), + Self::Tuple(ast_node) => { + if ast_node.is_parenthesized(source) { + ("(", ")") + } else { + ("", "") + } + } + } + } + + const fn opening_token_for_multiline_definition(&self) -> TokenKind { + match self { + Self::List => TokenKind::Lsqb, + Self::Set => TokenKind::Lbrace, + Self::Tuple(_) => TokenKind::Lpar, + } + } + + const fn closing_token_for_multiline_definition(&self) -> TokenKind { + match self { + Self::List => TokenKind::Rsqb, + Self::Set => TokenKind::Rbrace, + Self::Tuple(_) => TokenKind::Rpar, + } + } +} + +/// A newtype that zips together the string values of a display literal's elts, +/// together with the original AST nodes for that display literal's elts. +/// +/// The main purpose of separating this out into a separate struct +/// is to enforce the invariants that: +/// +/// 1. The two iterables that are zipped together have the same length; and, +/// 2. The length of both iterables is >= 2 +struct SequenceElements<'a>(Vec<(&'a &'a str, &'a ast::Expr)>); + +impl<'a> SequenceElements<'a> { + fn new(elements: &'a [&str], elts: &'a [ast::Expr]) -> Self { + assert_eq!(elements.len(), elts.len()); + assert!( + elements.len() >= 2, + "A sequence with < 2 elements cannot be unsorted" + ); + Self(elements.iter().zip(elts).collect()) + } + + fn last_item_index(&self) -> usize { + // Safe from underflow, as the constructor guarantees + // that the underlying vector has length >= 2 + self.0.len() - 1 + } + + fn into_sorted_elts( + mut self, + sorting_style: SortingStyle, + ) -> impl Iterator { + self.0 + .sort_by(|(elem1, _), (elem2, _)| sorting_style.compare(elem1, elem2)); + self.0.into_iter().map(|(_, elt)| elt) + } +} + +/// Create a string representing a fixed-up single-line +/// definition of `__all__` or `__slots__` (etc.), +/// that can be inserted into the +/// source code as a `range_replacement` autofix. +pub(super) fn sort_single_line_elements_sequence( + kind: &SequenceKind, + elts: &[ast::Expr], + elements: &[&str], + locator: &Locator, + sorting_style: SortingStyle, +) -> String { + let element_pairs = SequenceElements::new(elements, elts); + let last_item_index = element_pairs.last_item_index(); + let (opening_paren, closing_paren) = kind.surrounding_brackets(locator.contents()); + let mut result = String::from(opening_paren); + // We grab the original source-code ranges using `locator.slice()` + // rather than using the expression generator, as this approach allows + // us to easily preserve stylistic choices in the original source code + // such as whether double or single quotes were used. + for (i, elt) in element_pairs.into_sorted_elts(sorting_style).enumerate() { + result.push_str(locator.slice(elt)); + if i < last_item_index { + result.push_str(", "); + } + } + result.push_str(closing_paren); + result +} + +/// An enumeration of the possible conclusions we could come to +/// regarding the ordering of the elements in a display of string literals +#[derive(Debug, is_macro::Is)] +pub(super) enum SortClassification<'a> { + /// It's a display of string literals that is already sorted + Sorted, + /// It's an unsorted display of string literals, + /// but we wouldn't be able to autofix it + UnsortedButUnfixable, + /// It's an unsorted display of string literals, + /// and it's possible we could generate a fix for it; + /// here's the values of the elts so we can use them to + /// generate an autofix: + UnsortedAndMaybeFixable { items: Vec<&'a str> }, + /// The display contains one or more items that are not string + /// literals. + NotAListOfStringLiterals, +} + +impl<'a> SortClassification<'a> { + pub(super) fn of_elements(elements: &'a [ast::Expr], sorting_style: SortingStyle) -> Self { + // If it's of length less than 2, it has to be sorted already + let Some((first, rest @ [_, ..])) = elements.split_first() else { + return Self::Sorted; + }; + + // If any elt we encounter is not an ExprStringLiteral AST node, + // that indicates at least one item in the sequence is not a string literal, + // which means the sequence is out of scope for RUF022/RUF023/etc. + let Some(string_node) = first.as_string_literal_expr() else { + return Self::NotAListOfStringLiterals; + }; + let mut current = string_node.value.to_str(); + + for expr in rest { + let Some(string_node) = expr.as_string_literal_expr() else { + return Self::NotAListOfStringLiterals; + }; + let next = string_node.value.to_str(); + if sorting_style.compare(next, current).is_lt() { + // Looks like the sequence was not in fact already sorted! + // + // Now we need to gather the necessary information we'd need + // to create an autofix. We need to know three things for this: + // + // 1. Are all items in the sequence string literals? + // (If not, we won't even be emitting the violation, let alone + // trying to fix it.) + // 2. Are any items in the sequence implicitly concatenated? + // (If so, we might be *emitting* the violation, but we definitely + // won't be trying to fix it.) + // 3. What is the value of each elt in the sequence? + let mut items = Vec::with_capacity(elements.len()); + let mut any_implicit_concatenation = false; + for expr in elements { + let Some(string_node) = expr.as_string_literal_expr() else { + return Self::NotAListOfStringLiterals; + }; + any_implicit_concatenation |= string_node.value.is_implicit_concatenated(); + items.push(string_node.value.to_str()); + } + if any_implicit_concatenation { + return Self::UnsortedButUnfixable; + } + return Self::UnsortedAndMaybeFixable { items }; + } + current = next; + } + // Looks like the sequence was already sorted -- hooray! + // We won't be emitting a violation this time. + Self::Sorted + } +} + +// An instance of this struct encapsulates an analysis +/// of a multiline Python tuple/list that represents an +/// `__all__`/`__slots__`/etc. definition or augmentation. +pub(super) struct MultilineStringSequenceValue { + items: Vec, + range: TextRange, + ends_with_trailing_comma: bool, +} + +impl MultilineStringSequenceValue { + pub(super) fn len(&self) -> usize { + self.items.len() + } + + /// Analyse the source range for a multiline Python tuple/list that + /// represents an `__all__`/`__slots__`/etc. definition or augmentation. + /// Return `None` if the analysis fails for whatever reason. + pub(super) fn from_source_range( + range: TextRange, + kind: &SequenceKind, + locator: &Locator, + ) -> Option { + // Parse the multiline string sequence using the raw tokens. + // See the docs for `collect_string_sequence_lines()` for why we have to + // use the raw tokens, rather than just the AST, to do this parsing. + // + // Step (1). Start by collecting information on each line individually: + let (lines, ends_with_trailing_comma) = + collect_string_sequence_lines(range, kind, locator)?; + + // Step (2). Group lines together into sortable "items": + // - Any "item" contains a single element of the list/tuple + // - Assume that any comments on their own line are meant to be grouped + // with the element immediately below them: if the element moves, + // the comments above the element move with it. + // - The same goes for any comments on the same line as an element: + // if the element moves, the comment moves with it. + let items = collect_string_sequence_items(lines, range, locator); + + Some(MultilineStringSequenceValue { + items, + range, + ends_with_trailing_comma, + }) + } + + /// Sort a multiline sequence of literal strings + /// that is known to be unsorted. + /// + /// This function panics if it is called and `self.items` + /// has length < 2. It's redundant to call this method in this case, + /// since lists with < 2 items cannot be unsorted, + /// so this is a logic error. + pub(super) fn into_sorted_source_code( + mut self, + sorting_style: SortingStyle, + locator: &Locator, + stylist: &Stylist, + ) -> String { + let (first_item_start, last_item_end) = match self.items.as_slice() { + [first_item, .., last_item] => (first_item.start(), last_item.end()), + _ => panic!( + "We shouldn't be attempting an autofix if a sequence has < 2 elements; + a sequence with 1 or 0 elements cannot be unsorted." + ), + }; + + // As well as the "items" in a multiline string sequence, + // there is also a "prelude" and a "postlude": + // - Prelude == the region of source code from the opening parenthesis, + // up to the start of the first item in `__all__`/`__slots__`/etc. + // - Postlude == the region of source code from the end of the last + // item in `__all__`/`__slots__`/etc. up to and including the closing + // parenthesis. + // + // For example: + // + // ```python + // __all__ = [ # comment0 + // # comment1 + // "first item", + // "last item" # comment2 + // # comment3 + // ] # comment4 + // ``` + // + // - The prelude in the above example is the source code region + // starting just before the opening `[` and ending just after `# comment0`. + // `comment0` here counts as part of the prelude because it is on + // the same line as the opening paren, and because we haven't encountered + // any elements of `__all__` yet, but `comment1` counts as part of the first item, + // as it's on its own line, and all comments on their own line are grouped + // with the next element below them to make "items", + // (an "item" being a region of source code that all moves as one unit + // when `__all__` is sorted). + // - The postlude in the above example is the source code region starting + // just after `# comment2` and ending just after the closing paren. + // `# comment2` is part of the last item, as it's an inline comment on the + // same line as an element, but `# comment3` becomes part of the postlude + // because there are no items below it. `# comment4` is not part of the + // postlude: it's outside of the source-code range considered by this rule, + // and should therefore be untouched. + // + let newline = stylist.line_ending().as_str(); + let start_offset = self.start(); + let leading_indent = leading_indentation(locator.full_line(start_offset)); + let item_indent = format!("{}{}", leading_indent, stylist.indentation().as_str()); + + let prelude = + multiline_string_sequence_prelude(first_item_start, newline, start_offset, locator); + let postlude = multiline_string_sequence_postlude( + last_item_end, + newline, + leading_indent, + &item_indent, + self.end(), + locator, + ); + + self.items + .sort_by(|a, b| sorting_style.compare(&a.value, &b.value)); + let joined_items = join_multiline_string_sequence_items( + &self.items, + locator, + &item_indent, + newline, + self.ends_with_trailing_comma, + ); + + format!("{prelude}{joined_items}{postlude}") + } +} + +impl Ranged for MultilineStringSequenceValue { + fn range(&self) -> TextRange { + self.range + } +} + +/// Collect data on each line of a multiline string sequence. +/// Return `None` if the sequence appears to be invalid, +/// or if it's an edge case we don't support. +/// +/// Why do we need to do this using the raw tokens, +/// when we already have the AST? The AST strips out +/// crucial information that we need to track here for +/// a multiline string sequence, such as: +/// - The value of comments +/// - The amount of whitespace between the end of a line +/// and an inline comment +/// - Whether or not the final item in the tuple/list has a +/// trailing comma +/// +/// All of this information is necessary to have at a later +/// stage if we're to sort items without doing unnecessary +/// brutality to the comments and pre-existing style choices +/// in the original source code. +fn collect_string_sequence_lines( + range: TextRange, + kind: &SequenceKind, + locator: &Locator, +) -> Option<(Vec, bool)> { + // These first two variables are used for keeping track of state + // regarding the entirety of the string sequence... + let mut ends_with_trailing_comma = false; + let mut lines = vec![]; + // ... all state regarding a single line of a string sequence + // is encapsulated in this variable + let mut line_state = LineState::default(); + + // `lex_starts_at()` gives us absolute ranges rather than relative ranges, + // but (surprisingly) we still need to pass in the slice of code we want it to lex, + // rather than the whole source file: + let mut token_iter = + lexer::lex_starts_at(locator.slice(range), Mode::Expression, range.start()); + let (first_tok, _) = token_iter.next()?.ok()?; + if TokenKind::from(&first_tok) != kind.opening_token_for_multiline_definition() { + return None; + } + let expected_final_token = kind.closing_token_for_multiline_definition(); + + for pair in token_iter { + let (tok, subrange) = pair.ok()?; + match tok { + Tok::NonLogicalNewline => { + lines.push(line_state.into_string_sequence_line()); + line_state = LineState::default(); + } + Tok::Comment(_) => { + line_state.visit_comment_token(subrange); + } + Tok::String { value, .. } => { + line_state.visit_string_token(value, subrange); + ends_with_trailing_comma = false; + } + Tok::Comma => { + line_state.visit_comma_token(subrange); + ends_with_trailing_comma = true; + } + tok if TokenKind::from(&tok) == expected_final_token => { + lines.push(line_state.into_string_sequence_line()); + break; + } + _ => return None, + } + } + Some((lines, ends_with_trailing_comma)) +} + +/// This struct is for keeping track of state +/// regarding a single line in a multiline string sequence +/// It is purely internal to `collect_string_sequence_lines()`, +/// and should not be used outside that function. +/// +/// There are three possible kinds of line in a multiline +/// string sequence, and we don't know what kind of a line +/// we're in until all tokens in that line have been processed: +/// +/// - A line with just a comment +/// (`StringSequenceLine::JustAComment)`) +/// - A line with one or more string items in it +/// (`StringSequenceLine::OneOrMoreItems`) +/// - An empty line (`StringSequenceLine::Empty`) +/// +/// As we process the tokens in a single line, +/// this struct accumulates the necessary state for us +/// to be able to determine what kind of a line we're in. +/// Once the entire line has been processed, +/// `into_string_sequence_line()` is called, which consumes +/// `self` and produces the classification for the line. +#[derive(Debug, Default)] +struct LineState { + first_item_in_line: Option<(String, TextRange)>, + following_items_in_line: Vec<(String, TextRange)>, + comment_range_start: Option, + comment_in_line: Option, +} + +impl LineState { + fn visit_string_token(&mut self, token_value: String, token_range: TextRange) { + if self.first_item_in_line.is_none() { + self.first_item_in_line = Some((token_value, token_range)); + } else { + self.following_items_in_line + .push((token_value, token_range)); + } + self.comment_range_start = Some(token_range.end()); + } + + fn visit_comma_token(&mut self, token_range: TextRange) { + self.comment_range_start = Some(token_range.end()); + } + + /// If this is a comment on its own line, + /// record the range of that comment. + /// + /// *If*, however, we've already seen a comma + /// or a string in this line, that means that we're + /// in a line with items. In that case, we want to + /// record the range of the comment, *plus* the whitespace + /// (if any) preceding the comment. This is so that we don't + /// unnecessarily apply opinionated formatting changes + /// where they might not be welcome. + fn visit_comment_token(&mut self, token_range: TextRange) { + self.comment_in_line = { + if let Some(comment_range_start) = self.comment_range_start { + Some(TextRange::new(comment_range_start, token_range.end())) + } else { + Some(token_range) + } + } + } + + fn into_string_sequence_line(self) -> StringSequenceLine { + if let Some(first_item) = self.first_item_in_line { + StringSequenceLine::OneOrMoreItems(LineWithItems { + first_item, + following_items: self.following_items_in_line, + trailing_comment_range: self.comment_in_line, + }) + } else { + self.comment_in_line + .map_or(StringSequenceLine::Empty, |comment_range| { + StringSequenceLine::JustAComment(LineWithJustAComment(comment_range)) + }) + } + } +} + +/// Instances of this struct represent source-code lines in the middle +/// of multiline tuples/lists/sets where the line contains +/// 0 elements of the sequence, but the line does have a comment in it. +#[derive(Debug)] +struct LineWithJustAComment(TextRange); + +/// Instances of this struct represent source-code lines in +/// multiline tuples/lists/sets where the line contains at least +/// 1 element of the sequence. The line may contain > 1 element of the +/// sequence, and may also have a trailing comment after the element(s). +#[derive(Debug)] +struct LineWithItems { + // For elements in the list, we keep track of the value of the + // value of the element as well as the source-code range of the element. + // (We need to know the actual value so that we can sort the items.) + first_item: (String, TextRange), + following_items: Vec<(String, TextRange)>, + // For comments, we only need to keep track of the source-code range. + trailing_comment_range: Option, +} + +impl LineWithItems { + fn num_items(&self) -> usize { + self.following_items.len() + 1 + } +} + +/// An enumeration of the possible kinds of source-code lines +/// that can exist in a multiline string sequence: +/// +/// - A line that has no string elements, but does have a comment. +/// - A line that has one or more string elements, +/// and may also have a trailing comment. +/// - An entirely empty line. +#[derive(Debug)] +enum StringSequenceLine { + JustAComment(LineWithJustAComment), + OneOrMoreItems(LineWithItems), + Empty, +} + +/// Given data on each line in a multiline string sequence, +/// group lines together into "items". +/// +/// Each item contains exactly one string element, +/// but might contain multiple comments attached to that element +/// that must move with the element when the string sequence is sorted. +/// +/// Note that any comments following the last item are discarded here, +/// but that doesn't matter: we add them back in `into_sorted_source_code()` +/// as part of the `postlude` (see comments in that function) +fn collect_string_sequence_items( + lines: Vec, + dunder_all_range: TextRange, + locator: &Locator, +) -> Vec { + let mut all_items = Vec::with_capacity(match lines.as_slice() { + [StringSequenceLine::OneOrMoreItems(single)] => single.num_items(), + _ => lines.len(), + }); + let mut first_item_encountered = false; + let mut preceding_comment_ranges = vec![]; + for line in lines { + match line { + StringSequenceLine::JustAComment(LineWithJustAComment(comment_range)) => { + // Comments on the same line as the opening paren and before any elements + // count as part of the "prelude"; these are not grouped into any item... + if first_item_encountered + || locator.line_start(comment_range.start()) + != locator.line_start(dunder_all_range.start()) + { + // ...but for all other comments that precede an element, + // group the comment with the element following that comment + // into an "item", so that the comment moves as one with the element + // when the list/tuple/set is sorted + preceding_comment_ranges.push(comment_range); + } + } + StringSequenceLine::OneOrMoreItems(LineWithItems { + first_item: (first_val, first_range), + following_items, + trailing_comment_range: comment_range, + }) => { + first_item_encountered = true; + all_items.push(StringSequenceItem::new( + first_val, + std::mem::take(&mut preceding_comment_ranges), + first_range, + comment_range, + )); + for (value, range) in following_items { + all_items.push(StringSequenceItem::with_no_comments(value, range)); + } + } + StringSequenceLine::Empty => continue, // discard empty lines + } + } + all_items +} + +/// An instance of this struct represents a single element +/// from a multiline string sequence, *and* any comments that +/// are "attached" to it. The comments "attached" to the element +/// will move with the element when the tuple/list/set is sorted. +/// +/// Comments on their own line immediately preceding the element will +/// always form a contiguous range with the range of the element itself; +/// however, inline comments won't necessary form a contiguous range. +/// Consider the following scenario, where both `# comment0` and `# comment1` +/// will move with the "a" element when the list is sorted: +/// +/// ```python +/// __all__ = [ +/// "b", +/// # comment0 +/// "a", "c", # comment1 +/// ] +/// ``` +/// +/// The desired outcome here is: +/// +/// ```python +/// __all__ = [ +/// # comment0 +/// "a", # comment1 +/// "b", +/// "c", +/// ] +/// ``` +/// +/// To achieve this, both `# comment0` and `# comment1` +/// are grouped into the `StringSequenceItem` instance +/// where the value is `"a"`, even though the source-code range +/// of `# comment1` does not form a contiguous range with the +/// source-code range of `"a"`. +#[derive(Debug)] +struct StringSequenceItem { + value: String, + preceding_comment_ranges: Vec, + element_range: TextRange, + // total_range incorporates the ranges of preceding comments + // (which must be contiguous with the element), + // but doesn't incorporate any trailing comments + // (which might be contiguous, but also might not be) + total_range: TextRange, + end_of_line_comments: Option, +} + +impl StringSequenceItem { + fn new( + value: String, + preceding_comment_ranges: Vec, + element_range: TextRange, + end_of_line_comments: Option, + ) -> Self { + let total_range = { + if let Some(first_comment_range) = preceding_comment_ranges.first() { + TextRange::new(first_comment_range.start(), element_range.end()) + } else { + element_range + } + }; + Self { + value, + preceding_comment_ranges, + element_range, + total_range, + end_of_line_comments, + } + } + + fn with_no_comments(value: String, element_range: TextRange) -> Self { + Self::new(value, vec![], element_range, None) + } +} + +impl Ranged for StringSequenceItem { + fn range(&self) -> TextRange { + self.total_range + } +} + +/// Return a string representing the "prelude" for a +/// multiline string sequence. +/// +/// See inline comments in +/// `MultilineStringSequenceValue::into_sorted_source_code()` +/// for a definition of the term "prelude" in this context. +fn multiline_string_sequence_prelude<'a>( + first_item_start_offset: TextSize, + newline: &str, + dunder_all_offset: TextSize, + locator: &'a Locator, +) -> Cow<'a, str> { + let prelude_end = { + let first_item_line_offset = locator.line_start(first_item_start_offset); + if first_item_line_offset == locator.line_start(dunder_all_offset) { + first_item_start_offset + } else { + first_item_line_offset + } + }; + let prelude = locator.slice(TextRange::new(dunder_all_offset, prelude_end)); + if prelude.ends_with(['\r', '\n']) { + Cow::Borrowed(prelude) + } else { + Cow::Owned(format!("{}{}", prelude.trim_end(), newline)) + } +} + +/// Join the elements and comments of a multiline string sequence +/// definition into a single string. +/// +/// The resulting string does not include the "prelude" or +/// "postlude" of the tuple/set/list. +/// (See inline comments in +/// `MultilineStringSequence::into_sorted_source_code()` for +/// definitions of the terms "prelude" and "postlude" in this +/// context.) +fn join_multiline_string_sequence_items( + sorted_items: &[StringSequenceItem], + locator: &Locator, + item_indent: &str, + newline: &str, + needs_trailing_comma: bool, +) -> String { + assert!( + sorted_items.len() >= 2, + "A sequence with < 2 items cannot be unsorted" + ); + let last_item_index = sorted_items.len() - 1; + + let mut new_dunder_all = String::new(); + for (i, item) in sorted_items.iter().enumerate() { + let is_final_item = i == last_item_index; + for comment_range in &item.preceding_comment_ranges { + new_dunder_all.push_str(item_indent); + new_dunder_all.push_str(locator.slice(comment_range)); + new_dunder_all.push_str(newline); + } + new_dunder_all.push_str(item_indent); + new_dunder_all.push_str(locator.slice(item.element_range)); + if !is_final_item || needs_trailing_comma { + new_dunder_all.push(','); + } + if let Some(trailing_comments) = item.end_of_line_comments { + new_dunder_all.push_str(locator.slice(trailing_comments)); + } + if !is_final_item { + new_dunder_all.push_str(newline); + } + } + new_dunder_all +} + +/// Return a string representing the "postlude" for a +/// multiline string sequence. +/// +/// See inline comments in +/// `MultilineStringSequence::into_sorted_source_code()` +/// for a definition of the term "postlude" in this context. +fn multiline_string_sequence_postlude<'a>( + last_item_end_offset: TextSize, + newline: &str, + leading_indent: &str, + item_indent: &str, + dunder_all_range_end: TextSize, + locator: &'a Locator, +) -> Cow<'a, str> { + let postlude_start = { + let last_item_line_offset = locator.line_end(last_item_end_offset); + if last_item_line_offset == locator.line_end(dunder_all_range_end) { + last_item_end_offset + } else { + last_item_line_offset + } + }; + let postlude = locator.slice(TextRange::new(postlude_start, dunder_all_range_end)); + + // The rest of this function uses heuristics to + // avoid very long indents for the closing paren + // that don't match the style for the rest of the + // fixed-up multiline string sequence. + // + // For example, we want to avoid something like this + // (not uncommon in code that hasn't been + // autoformatted)... + // + // ```python + // __all__ = ["xxxxxx", "yyyyyy", + // "aaaaaa", "bbbbbb", + // ] + // ``` + // + // ...getting autofixed to this: + // + // ```python + // __all__ = [ + // "a", + // "b", + // "x", + // "y", + // ] + // ``` + let newline_chars = ['\r', '\n']; + if !postlude.starts_with(newline_chars) { + return Cow::Borrowed(postlude); + } + if TextSize::of(leading_indentation( + postlude.trim_start_matches(newline_chars), + )) <= TextSize::of(item_indent) + { + return Cow::Borrowed(postlude); + } + let trimmed_postlude = postlude.trim_start(); + if trimmed_postlude.starts_with([']', ')']) { + return Cow::Owned(format!("{newline}{leading_indent}{trimmed_postlude}")); + } + Cow::Borrowed(postlude) +} diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs index 788b300dd56cc..341bff19c5bdf 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs @@ -1,27 +1,19 @@ -use std::borrow::Cow; -use std::cmp::Ordering; - use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast as ast; -use ruff_python_codegen::Stylist; -use ruff_python_parser::{lexer, Mode, Tok}; -use ruff_python_stdlib::str::is_cased_uppercase; -use ruff_python_trivia::leading_indentation; -use ruff_source_file::Locator; -use ruff_text_size::{Ranged, TextRange, TextSize}; +use ruff_text_size::TextRange; use crate::checkers::ast::Checker; - -use is_macro; -use itertools::Itertools; -use natord; +use crate::rules::ruff::rules::sequence_sorting::{ + sort_single_line_elements_sequence, MultilineStringSequenceValue, SequenceKind, + SortClassification, SortingStyle, +}; /// ## What it does /// Checks for `__all__` definitions that are not ordered /// according to an "isort-style" sort. /// -/// An isort-style sort sorts items first according to their casing: +/// An isort-style sort orders items first according to their casing: /// SCREAMING_SNAKE_CASE names (conventionally used for global constants) /// come first, followed by CamelCase names (conventionally used for /// classes), followed by anything else. Within each category, @@ -85,6 +77,8 @@ impl Violation for UnsortedDunderAll { } } +const SORTING_STYLE: SortingStyle = SortingStyle::Isort; + /// Sort an `__all__` definition represented by a `StmtAssign` AST node. /// For example: `__all__ = ["b", "c", "a"]`. pub(crate) fn sort_dunder_all_assign( @@ -157,21 +151,21 @@ fn sort_dunder_all(checker: &mut Checker, target: &ast::Expr, node: &ast::Expr) } let (elts, range, kind) = match node { - ast::Expr::List(ast::ExprList { elts, range, .. }) => (elts, *range, DunderAllKind::List), + ast::Expr::List(ast::ExprList { elts, range, .. }) => (elts, *range, SequenceKind::List), ast::Expr::Tuple(tuple_node @ ast::ExprTuple { elts, range, .. }) => { - (elts, *range, DunderAllKind::Tuple(tuple_node)) + (elts, *range, SequenceKind::Tuple(tuple_node)) } _ => return, }; - let elts_analysis = DunderAllSortClassification::from_elements(elts); + let elts_analysis = SortClassification::of_elements(elts, SORTING_STYLE); if elts_analysis.is_not_a_list_of_string_literals() || elts_analysis.is_sorted() { return; } let mut diagnostic = Diagnostic::new(UnsortedDunderAll, range); - if let DunderAllSortClassification::UnsortedAndMaybeFixable { items } = elts_analysis { + if let SortClassification::UnsortedAndMaybeFixable { items } = elts_analysis { if let Some(fix) = create_fix(range, elts, &items, &kind, checker) { diagnostic.set_fix(fix); } @@ -180,176 +174,6 @@ fn sort_dunder_all(checker: &mut Checker, target: &ast::Expr, node: &ast::Expr) checker.diagnostics.push(diagnostic); } -/// An enumeration of the two valid ways of defining -/// `__all__`: as a list, or as a tuple. -/// -/// Whereas lists are always parenthesized -/// (they always start with `[` and end with `]`), -/// single-line tuples *can* be unparenthesized. -/// We keep the original AST node around for the -/// Tuple variant so that this can be queried later. -#[derive(Debug)] -enum DunderAllKind<'a> { - List, - Tuple(&'a ast::ExprTuple), -} - -impl DunderAllKind<'_> { - fn is_parenthesized(&self, source: &str) -> bool { - match self { - Self::List => true, - Self::Tuple(ast_node) => ast_node.is_parenthesized(source), - } - } - - fn opening_token_for_multiline_definition(&self) -> Tok { - match self { - Self::List => Tok::Lsqb, - Self::Tuple(_) => Tok::Lpar, - } - } - - fn closing_token_for_multiline_definition(&self) -> Tok { - match self { - Self::List => Tok::Rsqb, - Self::Tuple(_) => Tok::Rpar, - } - } -} - -/// An enumeration of the possible conclusions we could come to -/// regarding the ordering of the elements in an `__all__` definition: -/// -/// 1. `__all__` is a list of string literals that is already sorted -/// 2. `__all__` is an unsorted list of string literals, -/// but we wouldn't be able to autofix it -/// 3. `__all__` is an unsorted list of string literals, -/// and it's possible we could generate a fix for it -/// 4. `__all__` contains one or more items that are not string -/// literals. -/// -/// ("Sorted" here means "ordered according to an isort-style sort". -/// See the module-level docs for a definition of "isort-style sort.") -#[derive(Debug, is_macro::Is)] -enum DunderAllSortClassification<'a> { - Sorted, - UnsortedButUnfixable, - UnsortedAndMaybeFixable { items: Vec<&'a str> }, - NotAListOfStringLiterals, -} - -impl<'a> DunderAllSortClassification<'a> { - fn from_elements(elements: &'a [ast::Expr]) -> Self { - let Some((first, rest @ [_, ..])) = elements.split_first() else { - return Self::Sorted; - }; - let Some(string_node) = first.as_string_literal_expr() else { - return Self::NotAListOfStringLiterals; - }; - let mut this = string_node.value.to_str(); - - for expr in rest { - let Some(string_node) = expr.as_string_literal_expr() else { - return Self::NotAListOfStringLiterals; - }; - let next = string_node.value.to_str(); - if AllItemSortKey::from(next) < AllItemSortKey::from(this) { - let mut items = Vec::with_capacity(elements.len()); - for expr in elements { - let Some(string_node) = expr.as_string_literal_expr() else { - return Self::NotAListOfStringLiterals; - }; - if string_node.value.is_implicit_concatenated() { - return Self::UnsortedButUnfixable; - } - items.push(string_node.value.to_str()); - } - return Self::UnsortedAndMaybeFixable { items }; - } - this = next; - } - Self::Sorted - } -} - -/// A struct to implement logic necessary to achieve -/// an "isort-style sort". -/// -/// See the docs for this module as a whole for the -/// definition we use here of an "isort-style sort". -struct AllItemSortKey<'a> { - category: InferredMemberType, - value: &'a str, -} - -impl Ord for AllItemSortKey<'_> { - fn cmp(&self, other: &Self) -> Ordering { - self.category - .cmp(&other.category) - .then_with(|| natord::compare(self.value, other.value)) - } -} - -impl PartialOrd for AllItemSortKey<'_> { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl PartialEq for AllItemSortKey<'_> { - fn eq(&self, other: &Self) -> bool { - self.cmp(other) == Ordering::Equal - } -} - -impl Eq for AllItemSortKey<'_> {} - -impl<'a> From<&'a str> for AllItemSortKey<'a> { - fn from(value: &'a str) -> Self { - Self { - category: InferredMemberType::of(value), - value, - } - } -} - -impl<'a> From<&'a DunderAllItem> for AllItemSortKey<'a> { - fn from(item: &'a DunderAllItem) -> Self { - Self::from(item.value.as_str()) - } -} - -/// Classification for an element in `__all__`. -/// -/// This is necessary to achieve an "isort-style" sort, -/// where elements are sorted first by category, -/// then, within categories, are sorted according -/// to a natural sort. -/// -/// You'll notice that a very similar enum exists -/// in ruff's reimplementation of isort. -#[derive(Debug, Ord, PartialOrd, Eq, PartialEq, Clone, Copy)] -enum InferredMemberType { - Constant, - Class, - Other, -} - -impl InferredMemberType { - fn of(value: &str) -> Self { - // E.g. `CONSTANT` - if value.len() > 1 && is_cased_uppercase(value) { - Self::Constant - // E.g. `Class` - } else if value.starts_with(char::is_uppercase) { - Self::Class - // E.g. `some_variable` or `some_function` - } else { - Self::Other - } - } -} - /// Attempt to return `Some(fix)`, where `fix` is a `Fix` /// that can be set on the diagnostic to sort the user's /// `__all__` definition @@ -363,7 +187,7 @@ fn create_fix( range: TextRange, elts: &[ast::Expr], string_items: &[&str], - kind: &DunderAllKind, + kind: &SequenceKind, checker: &Checker, ) -> Option { let locator = checker.locator(); @@ -384,11 +208,11 @@ fn create_fix( // bare minimum of token-processing for single-line `__all__` // definitions: if is_multiline { - let value = MultilineDunderAllValue::from_source_range(range, kind, locator)?; - assert_eq!(value.items.len(), elts.len()); - value.into_sorted_source_code(locator, checker.stylist()) + let value = MultilineStringSequenceValue::from_source_range(range, kind, locator)?; + assert_eq!(value.len(), elts.len()); + value.into_sorted_source_code(SORTING_STYLE, locator, checker.stylist()) } else { - sort_single_line_dunder_all(elts, string_items, kind, locator) + sort_single_line_elements_sequence(kind, elts, string_items, locator, SORTING_STYLE) } }; @@ -397,623 +221,3 @@ fn create_fix( range, ))) } - -/// An instance of this struct encapsulates an analysis -/// of a multiline Python tuple/list that represents an -/// `__all__` definition or augmentation. -struct MultilineDunderAllValue { - items: Vec, - range: TextRange, - ends_with_trailing_comma: bool, -} - -impl MultilineDunderAllValue { - /// Analyse the source range for a multiline Python tuple/list that - /// represents an `__all__` definition or augmentation. Return `None` - /// if the analysis fails for whatever reason. - fn from_source_range( - range: TextRange, - kind: &DunderAllKind, - locator: &Locator, - ) -> Option { - // Parse the multiline `__all__` definition using the raw tokens. - // See the docs for `collect_dunder_all_lines()` for why we have to - // use the raw tokens, rather than just the AST, to do this parsing. - // - // Step (1). Start by collecting information on each line individually: - let (lines, ends_with_trailing_comma) = collect_dunder_all_lines(range, kind, locator)?; - - // Step (2). Group lines together into sortable "items": - // - Any "item" contains a single element of the `__all__` list/tuple - // - Assume that any comments on their own line are meant to be grouped - // with the element immediately below them: if the element moves, - // the comments above the element move with it. - // - The same goes for any comments on the same line as an element: - // if the element moves, the comment moves with it. - let items = collect_dunder_all_items(lines, range, locator); - - Some(MultilineDunderAllValue { - items, - range, - ends_with_trailing_comma, - }) - } - - /// Sort a multiline `__all__` definition - /// that is known to be unsorted. - /// - /// Panics if this is called and `self.items` - /// has length < 2. It's redundant to call this method in this case, - /// since lists with < 2 items cannot be unsorted, - /// so this is a logic error. - fn into_sorted_source_code(mut self, locator: &Locator, stylist: &Stylist) -> String { - let (first_item_start, last_item_end) = match self.items.as_slice() { - [first_item, .., last_item] => (first_item.start(), last_item.end()), - _ => panic!( - "We shouldn't be attempting an autofix if `__all__` has < 2 elements; - an `__all__` definition with 1 or 0 elements cannot be unsorted." - ), - }; - - // As well as the "items" in the `__all__` definition, - // there is also a "prelude" and a "postlude": - // - Prelude == the region of source code from the opening parenthesis, - // up to the start of the first item in `__all__`. - // - Postlude == the region of source code from the end of the last - // item in `__all__` up to and including the closing parenthesis. - // - // For example: - // - // ```python - // __all__ = [ # comment0 - // # comment1 - // "first item", - // "last item" # comment2 - // # comment3 - // ] # comment4 - // ``` - // - // - The prelude in the above example is the source code region - // starting just before the opening `[` and ending just after `# comment0`. - // `comment0` here counts as part of the prelude because it is on - // the same line as the opening paren, and because we haven't encountered - // any elements of `__all__` yet, but `comment1` counts as part of the first item, - // as it's on its own line, and all comments on their own line are grouped - // with the next element below them to make "items", - // (an "item" being a region of source code that all moves as one unit - // when `__all__` is sorted). - // - The postlude in the above example is the source code region starting - // just after `# comment2` and ending just after the closing paren. - // `# comment2` is part of the last item, as it's an inline comment on the - // same line as an element, but `# comment3` becomes part of the postlude - // because there are no items below it. `# comment4` is not part of the - // postlude: it's outside of the source-code range considered by this rule, - // and should therefore be untouched. - // - let newline = stylist.line_ending().as_str(); - let start_offset = self.start(); - let leading_indent = leading_indentation(locator.full_line(start_offset)); - let item_indent = format!("{}{}", leading_indent, stylist.indentation().as_str()); - - let prelude = - multiline_dunder_all_prelude(first_item_start, newline, start_offset, locator); - let postlude = multiline_dunder_all_postlude( - last_item_end, - newline, - leading_indent, - &item_indent, - self.end(), - locator, - ); - - self.items - .sort_by(|this, next| AllItemSortKey::from(this).cmp(&AllItemSortKey::from(next))); - let joined_items = join_multiline_dunder_all_items( - &self.items, - locator, - &item_indent, - newline, - self.ends_with_trailing_comma, - ); - - format!("{prelude}{joined_items}{postlude}") - } -} - -impl Ranged for MultilineDunderAllValue { - fn range(&self) -> TextRange { - self.range - } -} - -/// Collect data on each line of a multiline `__all__` definition. -/// Return `None` if `__all__` appears to be invalid, -/// or if it's an edge case we don't support. -/// -/// Why do we need to do this using the raw tokens, -/// when we already have the AST? The AST strips out -/// crucial information that we need to track here for -/// a multiline `__all__` definition, such as: -/// - The value of comments -/// - The amount of whitespace between the end of a line -/// and an inline comment -/// - Whether or not the final item in the tuple/list has a -/// trailing comma -/// -/// All of this information is necessary to have at a later -/// stage if we're to sort items without doing unnecessary -/// brutality to the comments and pre-existing style choices -/// in the original source code. -fn collect_dunder_all_lines( - range: TextRange, - kind: &DunderAllKind, - locator: &Locator, -) -> Option<(Vec, bool)> { - // These first two variables are used for keeping track of state - // regarding the entirety of the `__all__` definition... - let mut ends_with_trailing_comma = false; - let mut lines = vec![]; - // ... all state regarding a single line of an `__all__` definition - // is encapsulated in this variable - let mut line_state = LineState::default(); - - // `lex_starts_at()` gives us absolute ranges rather than relative ranges, - // but (surprisingly) we still need to pass in the slice of code we want it to lex, - // rather than the whole source file: - let mut token_iter = - lexer::lex_starts_at(locator.slice(range), Mode::Expression, range.start()); - let (first_tok, _) = token_iter.next()?.ok()?; - if first_tok != kind.opening_token_for_multiline_definition() { - return None; - } - let expected_final_token = kind.closing_token_for_multiline_definition(); - - for pair in token_iter { - let (tok, subrange) = pair.ok()?; - match tok { - Tok::NonLogicalNewline => { - lines.push(line_state.into_dunder_all_line()); - line_state = LineState::default(); - } - Tok::Comment(_) => { - line_state.visit_comment_token(subrange); - } - Tok::String { value, .. } => { - line_state.visit_string_token(value, subrange); - ends_with_trailing_comma = false; - } - Tok::Comma => { - line_state.visit_comma_token(subrange); - ends_with_trailing_comma = true; - } - tok if tok == expected_final_token => { - lines.push(line_state.into_dunder_all_line()); - break; - } - _ => return None, - } - } - Some((lines, ends_with_trailing_comma)) -} - -/// This struct is for keeping track of state -/// regarding a single line in a multiline `__all__` definition. -/// It is purely internal to `collect_dunder_all_lines()`, -/// and should not be used outside that function. -/// -/// There are three possible kinds of line in a multiline -/// `__all__` definition, and we don't know what kind of a line -/// we're in until all tokens in that line have been processed: -/// -/// - A line with just a comment (`DunderAllLine::JustAComment)`) -/// - A line with one or more string items in it (`DunderAllLine::OneOrMoreItems`) -/// - An empty line (`DunderAllLine::Empty`) -/// -/// As we process the tokens in a single line, -/// this struct accumulates the necessary state for us -/// to be able to determine what kind of a line we're in. -/// Once the entire line has been processed, `into_dunder_all_line()` -/// is called, which consumes `self` and produces the -/// classification for the line. -#[derive(Debug, Default)] -struct LineState { - first_item_in_line: Option<(String, TextRange)>, - following_items_in_line: Vec<(String, TextRange)>, - comment_range_start: Option, - comment_in_line: Option, -} - -impl LineState { - fn visit_string_token(&mut self, token_value: String, token_range: TextRange) { - if self.first_item_in_line.is_none() { - self.first_item_in_line = Some((token_value, token_range)); - } else { - self.following_items_in_line - .push((token_value, token_range)); - } - self.comment_range_start = Some(token_range.end()); - } - - fn visit_comma_token(&mut self, token_range: TextRange) { - self.comment_range_start = Some(token_range.end()); - } - - /// If this is a comment on its own line, - /// record the range of that comment. - /// - /// *If*, however, we've already seen a comma - /// or a string in this line, that means that we're - /// in a line with items. In that case, we want to - /// record the range of the comment, *plus* the whitespace - /// (if any) preceding the comment. This is so that we don't - /// unnecessarily apply opinionated formatting changes - /// where they might not be welcome. - fn visit_comment_token(&mut self, token_range: TextRange) { - self.comment_in_line = { - if let Some(comment_range_start) = self.comment_range_start { - Some(TextRange::new(comment_range_start, token_range.end())) - } else { - Some(token_range) - } - } - } - - fn into_dunder_all_line(self) -> DunderAllLine { - if let Some(first_item) = self.first_item_in_line { - DunderAllLine::OneOrMoreItems(LineWithItems { - first_item, - following_items: self.following_items_in_line, - trailing_comment_range: self.comment_in_line, - }) - } else { - self.comment_in_line - .map_or(DunderAllLine::Empty, |comment_range| { - DunderAllLine::JustAComment(LineWithJustAComment(comment_range)) - }) - } - } -} - -/// Instances of this struct represent source-code lines in the middle -/// of multiline `__all__` tuples/lists where the line contains -/// 0 elements of the tuple/list, but the line does have a comment in it. -#[derive(Debug)] -struct LineWithJustAComment(TextRange); - -/// Instances of this struct represent source-code lines in single-line -/// or multiline `__all__` tuples/lists where the line contains at least -/// 1 element of the tuple/list. The line may contain > 1 element of the -/// tuple/list, and may also have a trailing comment after the element(s). -#[derive(Debug)] -struct LineWithItems { - // For elements in the list, we keep track of the value of the - // value of the element as well as the source-code range of the element. - // (We need to know the actual value so that we can sort the items.) - first_item: (String, TextRange), - following_items: Vec<(String, TextRange)>, - // For comments, we only need to keep track of the source-code range. - trailing_comment_range: Option, -} - -impl LineWithItems { - fn num_items(&self) -> usize { - self.following_items.len() + 1 - } -} - -/// An enumeration of the possible kinds of source-code lines -/// that can exist in a multiline `__all__` tuple or list: -/// -/// - A line that has no string elements, but does have a comment. -/// - A line that has one or more string elements, -/// and may also have a trailing comment. -/// - An entirely empty line. -#[derive(Debug)] -enum DunderAllLine { - JustAComment(LineWithJustAComment), - OneOrMoreItems(LineWithItems), - Empty, -} - -/// Given data on each line in a multiline `__all__` definition, -/// group lines together into "items". -/// -/// Each item contains exactly one string element, -/// but might contain multiple comments attached to that element -/// that must move with the element when `__all__` is sorted. -/// -/// Note that any comments following the last item are discarded here, -/// but that doesn't matter: we add them back in `into_sorted_source_code()` -/// as part of the `postlude` (see comments in that function) -fn collect_dunder_all_items( - lines: Vec, - dunder_all_range: TextRange, - locator: &Locator, -) -> Vec { - let mut all_items = Vec::with_capacity(match lines.as_slice() { - [DunderAllLine::OneOrMoreItems(single)] => single.num_items(), - _ => lines.len(), - }); - let mut first_item_encountered = false; - let mut preceding_comment_ranges = vec![]; - for line in lines { - match line { - DunderAllLine::JustAComment(LineWithJustAComment(comment_range)) => { - // Comments on the same line as the opening paren and before any elements - // count as part of the "prelude"; these are not grouped into any item... - if first_item_encountered - || locator.line_start(comment_range.start()) - != locator.line_start(dunder_all_range.start()) - { - // ...but for all other comments that precede an element, - // group the comment with the element following that comment - // into an "item", so that the comment moves as one with the element - // when the `__all__` list/tuple is sorted - preceding_comment_ranges.push(comment_range); - } - } - DunderAllLine::OneOrMoreItems(LineWithItems { - first_item: (first_val, first_range), - following_items, - trailing_comment_range: comment_range, - }) => { - first_item_encountered = true; - all_items.push(DunderAllItem::new( - first_val, - std::mem::take(&mut preceding_comment_ranges), - first_range, - comment_range, - )); - for (value, range) in following_items { - all_items.push(DunderAllItem::with_no_comments(value, range)); - } - } - DunderAllLine::Empty => continue, // discard empty lines - } - } - all_items -} - -/// An instance of this struct represents a single element -/// from a multiline `__all__` tuple/list, *and* any comments that -/// are "attached" to it. The comments "attached" to the element -/// will move with the element when the `__all__` tuple/list is sorted. -/// -/// Comments on their own line immediately preceding the element will -/// always form a contiguous range with the range of the element itself; -/// however, inline comments won't necessary form a contiguous range. -/// Consider the following scenario, where both `# comment0` and `# comment1` -/// will move with the "a" element when the list is sorted: -/// -/// ```python -/// __all__ = [ -/// "b", -/// # comment0 -/// "a", "c", # comment1 -/// ] -/// ``` -/// -/// The desired outcome here is: -/// -/// ```python -/// __all__ = [ -/// # comment0 -/// "a", # comment1 -/// "b", -/// "c", -/// ] -/// ``` -/// -/// To achieve this, both `# comment0` and `# comment1` -/// are grouped into the `DunderAllItem` instance -/// where the value is `"a"`, even though the source-code range -/// of `# comment1` does not form a contiguous range with the -/// source-code range of `"a"`. -#[derive(Debug)] -struct DunderAllItem { - value: String, - preceding_comment_ranges: Vec, - element_range: TextRange, - // total_range incorporates the ranges of preceding comments - // (which must be contiguous with the element), - // but doesn't incorporate any trailing comments - // (which might be contiguous, but also might not be) - total_range: TextRange, - end_of_line_comments: Option, -} - -impl DunderAllItem { - fn new( - value: String, - preceding_comment_ranges: Vec, - element_range: TextRange, - end_of_line_comments: Option, - ) -> Self { - let total_range = { - if let Some(first_comment_range) = preceding_comment_ranges.first() { - TextRange::new(first_comment_range.start(), element_range.end()) - } else { - element_range - } - }; - Self { - value, - preceding_comment_ranges, - element_range, - total_range, - end_of_line_comments, - } - } - - fn with_no_comments(value: String, element_range: TextRange) -> Self { - Self::new(value, vec![], element_range, None) - } -} - -impl Ranged for DunderAllItem { - fn range(&self) -> TextRange { - self.total_range - } -} - -/// Return a string representing the "prelude" for a -/// multiline `__all__` definition. -/// -/// See inline comments in -/// `MultilineDunderAllValue::into_sorted_source_code()` -/// for a definition of the term "prelude" in this context. -fn multiline_dunder_all_prelude<'a>( - first_item_start_offset: TextSize, - newline: &str, - dunder_all_offset: TextSize, - locator: &'a Locator, -) -> Cow<'a, str> { - let prelude_end = { - let first_item_line_offset = locator.line_start(first_item_start_offset); - if first_item_line_offset == locator.line_start(dunder_all_offset) { - first_item_start_offset - } else { - first_item_line_offset - } - }; - let prelude = locator.slice(TextRange::new(dunder_all_offset, prelude_end)); - if prelude.ends_with(['\r', '\n']) { - Cow::Borrowed(prelude) - } else { - Cow::Owned(format!("{}{}", prelude.trim_end(), newline)) - } -} - -/// Join the elements and comments of a multiline `__all__` -/// definition into a single string. -/// -/// The resulting string does not include the "prelude" or -/// "postlude" of the `__all__` definition. -/// (See inline comments in `MultilineDunderAllValue::into_sorted_source_code()` -/// for definitions of the terms "prelude" and "postlude" -/// in this context.) -fn join_multiline_dunder_all_items( - sorted_items: &[DunderAllItem], - locator: &Locator, - item_indent: &str, - newline: &str, - needs_trailing_comma: bool, -) -> String { - let last_item_index = sorted_items.len() - 1; - - let mut new_dunder_all = String::new(); - for (i, item) in sorted_items.iter().enumerate() { - let is_final_item = i == last_item_index; - for comment_range in &item.preceding_comment_ranges { - new_dunder_all.push_str(item_indent); - new_dunder_all.push_str(locator.slice(comment_range)); - new_dunder_all.push_str(newline); - } - new_dunder_all.push_str(item_indent); - new_dunder_all.push_str(locator.slice(item.element_range)); - if !is_final_item || needs_trailing_comma { - new_dunder_all.push(','); - } - if let Some(trailing_comments) = item.end_of_line_comments { - new_dunder_all.push_str(locator.slice(trailing_comments)); - } - if !is_final_item { - new_dunder_all.push_str(newline); - } - } - new_dunder_all -} - -/// Return a string representing the "postlude" for a -/// multiline `__all__` definition. -/// -/// See inline comments in -/// `MultilineDunderAllValue::into_sorted_source_code()` -/// for a definition of the term "postlude" in this context. -fn multiline_dunder_all_postlude<'a>( - last_item_end_offset: TextSize, - newline: &str, - leading_indent: &str, - item_indent: &str, - dunder_all_range_end: TextSize, - locator: &'a Locator, -) -> Cow<'a, str> { - let postlude_start = { - let last_item_line_offset = locator.line_end(last_item_end_offset); - if last_item_line_offset == locator.line_end(dunder_all_range_end) { - last_item_end_offset - } else { - last_item_line_offset - } - }; - let postlude = locator.slice(TextRange::new(postlude_start, dunder_all_range_end)); - - // The rest of this function uses heuristics to - // avoid very long indents for the closing paren - // that don't match the style for the rest of the - // new `__all__` definition. - // - // For example, we want to avoid something like this - // (not uncommon in code that hasn't been - // autoformatted)... - // - // ```python - // __all__ = ["xxxxxx", "yyyyyy", - // "aaaaaa", "bbbbbb", - // ] - // ``` - // - // ...getting autofixed to this: - // - // ```python - // __all__ = [ - // "a", - // "b", - // "x", - // "y", - // ] - // ``` - let newline_chars = ['\r', '\n']; - if !postlude.starts_with(newline_chars) { - return Cow::Borrowed(postlude); - } - if TextSize::of(leading_indentation( - postlude.trim_start_matches(newline_chars), - )) <= TextSize::of(item_indent) - { - return Cow::Borrowed(postlude); - } - let trimmed_postlude = postlude.trim_start(); - if trimmed_postlude.starts_with([']', ')']) { - return Cow::Owned(format!("{newline}{leading_indent}{trimmed_postlude}")); - } - Cow::Borrowed(postlude) -} - -/// Create a string representing a fixed-up single-line -/// `__all__` definition, that can be inserted into the -/// source code as a `range_replacement` autofix. -fn sort_single_line_dunder_all( - elts: &[ast::Expr], - elements: &[&str], - kind: &DunderAllKind, - locator: &Locator, -) -> String { - // We grab the original source-code ranges using `locator.slice()` - // rather than using the expression generator, as this approach allows - // us to easily preserve stylistic choices in the original source code - // such as whether double or single quotes were used. - let mut element_pairs = elts.iter().zip(elements).collect_vec(); - element_pairs.sort_by_key(|(_, elem)| AllItemSortKey::from(**elem)); - let joined_items = element_pairs - .iter() - .map(|(elt, _)| locator.slice(elt)) - .join(", "); - match kind { - DunderAllKind::List => format!("[{joined_items}]"), - DunderAllKind::Tuple(_) if kind.is_parenthesized(locator.contents()) => { - format!("({joined_items})") - } - DunderAllKind::Tuple(_) => joined_items, - } -} diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs new file mode 100644 index 0000000000000..bac21f6befe88 --- /dev/null +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs @@ -0,0 +1,356 @@ +use std::borrow::Cow; +use std::fmt::Display; + +use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; +use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast as ast; +use ruff_python_semantic::ScopeKind; +use ruff_source_file::Locator; +use ruff_text_size::{Ranged, TextRange}; + +use crate::checkers::ast::Checker; +use crate::rules::ruff::rules::sequence_sorting::{ + sort_single_line_elements_sequence, MultilineStringSequenceValue, SequenceKind, + SortClassification, SortingStyle, +}; + +use itertools::izip; + +/// ## What it does +/// Checks for `__slots__` and `__match_args__` +/// definitions that are not ordered according to a +/// [natural sort](https://en.wikipedia.org/wiki/Natural_sort_order). +/// +/// ## Why is this bad? +/// Consistency is good. Use a common convention for +/// these special variables to make your code more +/// readable and idiomatic. +/// +/// ## Example +/// ```python +/// class Dog: +/// __slots__ = "name", "breed" +/// ``` +/// +/// Use instead: +/// ```python +/// class Dog: +/// __slots__ = "breed", "name" +/// ``` +#[violation] +pub struct UnsortedDunderSlots { + class_name: String, + class_variable: SpecialClassDunder, +} + +impl Violation for UnsortedDunderSlots { + const FIX_AVAILABILITY: FixAvailability = FixAvailability::Sometimes; + + #[derive_message_formats] + fn message(&self) -> String { + let UnsortedDunderSlots { + class_name, + class_variable, + } = self; + format!("`{class_name}.{class_variable}` is not sorted") + } + + fn fix_title(&self) -> Option { + let UnsortedDunderSlots { + class_name, + class_variable, + } = self; + Some(format!( + "Apply a natural sort to `{class_name}.{class_variable}`" + )) + } +} + +/// Enumeration of the two special class dunders +/// that we're interested in for this rule: `__match_args__` and `__slots__` +#[derive(Debug, Eq, PartialEq, Copy, Clone)] +enum SpecialClassDunder { + Slots, + MatchArgs, +} + +impl Display for SpecialClassDunder { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let string = match self { + Self::MatchArgs => "__match_args__", + Self::Slots => "__slots__", + }; + write!(f, "{string}") + } +} + +/// Sort a `__slots__`/`__match_args__` definition +/// represented by a `StmtAssign` AST node. +/// For example: `__slots__ = ["b", "c", "a"]`. +pub(crate) fn sort_dunder_slots_assign( + checker: &mut Checker, + ast::StmtAssign { value, targets, .. }: &ast::StmtAssign, +) { + if let [expr] = targets.as_slice() { + sort_dunder_slots(checker, expr, value); + } +} + +/// Sort a `__slots__`/`__match_args__` definition +/// represented by a `StmtAnnAssign` AST node. +/// For example: `__slots__: list[str] = ["b", "c", "a"]`. +pub(crate) fn sort_dunder_slots_ann_assign(checker: &mut Checker, node: &ast::StmtAnnAssign) { + if let Some(value) = &node.value { + sort_dunder_slots(checker, &node.target, value); + } +} + +const SORTING_STYLE: SortingStyle = SortingStyle::Natural; + +/// Sort a tuple, list, dict or set that defines `__slots__` +/// or `__match_args__` in a class scope. +/// +/// This routine checks whether the display is sorted, and emits a +/// violation if it is not sorted. If the tuple/list/set was not sorted, +/// it attempts to set a `Fix` on the violation. +fn sort_dunder_slots(checker: &mut Checker, target: &ast::Expr, node: &ast::Expr) { + let ast::Expr::Name(ast::ExprName { id, .. }) = target else { + return; + }; + + let dunder_kind = match id.as_str() { + "__slots__" => SpecialClassDunder::Slots, + "__match_args__" => SpecialClassDunder::MatchArgs, + _ => return, + }; + + // We're only interested in `__slots__`/`__match_args__` in the class scope + let ScopeKind::Class(ast::StmtClassDef { + name: class_name, .. + }) = checker.semantic().current_scope().kind + else { + return; + }; + + let Some(display) = StringLiteralDisplay::new(node, dunder_kind) else { + return; + }; + + let sort_classification = SortClassification::of_elements(&display.elts, SORTING_STYLE); + if sort_classification.is_not_a_list_of_string_literals() || sort_classification.is_sorted() { + return; + } + + let mut diagnostic = Diagnostic::new( + UnsortedDunderSlots { + class_name: class_name.to_string(), + class_variable: dunder_kind, + }, + display.range, + ); + + if let SortClassification::UnsortedAndMaybeFixable { items } = sort_classification { + if let Some(fix) = display.generate_fix(&items, checker) { + diagnostic.set_fix(fix); + } + } + + checker.diagnostics.push(diagnostic); +} + +/// Struct representing a [display](https://docs.python.org/3/reference/expressions.html#displays-for-lists-sets-and-dictionaries) +/// of string literals. +#[derive(Debug)] +struct StringLiteralDisplay<'a> { + /// The elts from the original AST node representing the display. + /// Each elt is the AST representation of a single string literal + /// element in the display + elts: Cow<'a, Vec>, + /// The source-code range of the display as a whole + range: TextRange, + /// What kind of a display is it? A dict, set, list or tuple? + display_kind: DisplayKind<'a>, +} + +impl Ranged for StringLiteralDisplay<'_> { + fn range(&self) -> TextRange { + self.range + } +} + +impl<'a> StringLiteralDisplay<'a> { + fn new(node: &'a ast::Expr, dunder_kind: SpecialClassDunder) -> Option { + let result = match (dunder_kind, node) { + (_, ast::Expr::List(ast::ExprList { elts, range, .. })) => { + let display_kind = DisplayKind::Sequence(SequenceKind::List); + Self { + elts: Cow::Borrowed(elts), + range: *range, + display_kind, + } + } + (_, ast::Expr::Tuple(tuple_node @ ast::ExprTuple { elts, range, .. })) => { + let display_kind = DisplayKind::Sequence(SequenceKind::Tuple(tuple_node)); + Self { + elts: Cow::Borrowed(elts), + range: *range, + display_kind, + } + } + (SpecialClassDunder::Slots, ast::Expr::Set(ast::ExprSet { elts, range })) => { + let display_kind = DisplayKind::Sequence(SequenceKind::Set); + Self { + elts: Cow::Borrowed(elts), + range: *range, + display_kind, + } + } + ( + SpecialClassDunder::Slots, + ast::Expr::Dict(ast::ExprDict { + keys, + values, + range, + }), + ) => { + let mut narrowed_keys = Vec::with_capacity(values.len()); + for key in keys { + if let Some(key) = key { + // This is somewhat unfortunate, + // *but* only `__slots__` can be a dict out of + // `__all__`, `__slots__` and `__match_args__`, + // and even for `__slots__`, using a dict is very rare + narrowed_keys.push(key.to_owned()); + } else { + return None; + } + } + // If `None` was present in the keys, it indicates a "** splat", .e.g + // `__slots__ = {"foo": "bar", **other_dict}` + // If `None` wasn't present in the keys, + // the length of the keys should always equal the length of the values + assert_eq!(narrowed_keys.len(), values.len()); + let display_kind = DisplayKind::Dict { values }; + Self { + elts: Cow::Owned(narrowed_keys), + range: *range, + display_kind, + } + } + _ => return None, + }; + Some(result) + } + + fn generate_fix(&self, items: &[&str], checker: &Checker) -> Option { + let locator = checker.locator(); + let is_multiline = locator.contains_line_break(self.range()); + let sorted_source_code = match (&self.display_kind, is_multiline) { + (DisplayKind::Sequence(sequence_kind), true) => { + let analyzed_sequence = MultilineStringSequenceValue::from_source_range( + self.range(), + sequence_kind, + locator, + )?; + assert_eq!(analyzed_sequence.len(), self.elts.len()); + analyzed_sequence.into_sorted_source_code(SORTING_STYLE, locator, checker.stylist()) + } + // Sorting multiline dicts is unsupported + (DisplayKind::Dict { .. }, true) => return None, + (DisplayKind::Sequence(sequence_kind), false) => sort_single_line_elements_sequence( + sequence_kind, + &self.elts, + items, + locator, + SORTING_STYLE, + ), + (DisplayKind::Dict { values }, false) => { + sort_single_line_elements_dict(&self.elts, items, values, locator) + } + }; + Some(Fix::safe_edit(Edit::range_replacement( + sorted_source_code, + self.range, + ))) + } +} + +/// An enumeration of the various kinds of +/// [display literals](https://docs.python.org/3/reference/expressions.html#displays-for-lists-sets-and-dictionaries) +/// Python provides for builtin containers. +#[derive(Debug)] +enum DisplayKind<'a> { + Sequence(SequenceKind<'a>), + Dict { values: &'a [ast::Expr] }, +} + +/// A newtype that zips together three iterables: +/// +/// 1. The string values of a dict literal's keys; +/// 2. The original AST nodes for the dict literal's keys; and, +/// 3. The original AST nodes for the dict literal's values +/// +/// The main purpose of separating this out into a separate struct +/// is to enforce the invariants that: +/// +/// 1. The three iterables that are zipped together have the same length; and, +/// 2. The length of all three iterables is >= 2 +struct DictElements<'a>(Vec<(&'a &'a str, &'a ast::Expr, &'a ast::Expr)>); + +impl<'a> DictElements<'a> { + fn new(elements: &'a [&str], key_elts: &'a [ast::Expr], value_elts: &'a [ast::Expr]) -> Self { + assert_eq!(key_elts.len(), elements.len()); + assert_eq!(elements.len(), value_elts.len()); + assert!( + elements.len() >= 2, + "A sequence with < 2 elements cannot be unsorted" + ); + Self(izip!(elements, key_elts, value_elts).collect()) + } + + fn last_item_index(&self) -> usize { + // Safe from underflow, as the constructor guarantees + // that the underlying vector has length >= 2 + self.0.len() - 1 + } + + fn into_sorted_elts(mut self) -> impl Iterator { + self.0 + .sort_by(|(elem1, _, _), (elem2, _, _)| SORTING_STYLE.compare(elem1, elem2)); + self.0.into_iter().map(|(_, key, value)| (key, value)) + } +} + +/// Create a string representing a fixed-up single-line +/// definition of a `__slots__` dictionary that can be +/// inserted into the source code as a `range_replacement` +/// autofix. +/// +/// N.B. This function could potentially be moved into +/// `sequence_sorting.rs` if any other modules need it, +/// but stays here for now, since this is currently the +/// only module that needs it +fn sort_single_line_elements_dict( + key_elts: &[ast::Expr], + elements: &[&str], + value_elts: &[ast::Expr], + locator: &Locator, +) -> String { + let element_trios = DictElements::new(elements, key_elts, value_elts); + let last_item_index = element_trios.last_item_index(); + let mut result = String::from('{'); + // We grab the original source-code ranges using `locator.slice()` + // rather than using the expression generator, as this approach allows + // us to easily preserve stylistic choices in the original source code + // such as whether double or single quotes were used. + for (i, (key, value)) in element_trios.into_sorted_elts().enumerate() { + result.push_str(locator.slice(key)); + result.push_str(": "); + result.push_str(locator.slice(value)); + if i < last_item_index { + result.push_str(", "); + } + } + result.push('}'); + result +} diff --git a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__RUF023_RUF023.py.snap b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__RUF023_RUF023.py.snap new file mode 100644 index 0000000000000..3d6fa43fd1691 --- /dev/null +++ b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__RUF023_RUF023.py.snap @@ -0,0 +1,546 @@ +--- +source: crates/ruff_linter/src/rules/ruff/mod.rs +--- +RUF023.py:6:17: RUF023 [*] `Klass.__slots__` is not sorted + | +5 | class Klass: +6 | __slots__ = ["d", "c", "b", "a"] # a comment that is untouched + | ^^^^^^^^^^^^^^^^^^^^ RUF023 +7 | __match_args__ = ("d", "c", "b", "a") + | + = help: Apply a natural sort to `Klass.__slots__` + +ℹ Safe fix +3 3 | ######################### +4 4 | +5 5 | class Klass: +6 |- __slots__ = ["d", "c", "b", "a"] # a comment that is untouched + 6 |+ __slots__ = ["a", "b", "c", "d"] # a comment that is untouched +7 7 | __match_args__ = ("d", "c", "b", "a") +8 8 | +9 9 | # Quoting style is retained, + +RUF023.py:7:22: RUF023 [*] `Klass.__match_args__` is not sorted + | +5 | class Klass: +6 | __slots__ = ["d", "c", "b", "a"] # a comment that is untouched +7 | __match_args__ = ("d", "c", "b", "a") + | ^^^^^^^^^^^^^^^^^^^^ RUF023 +8 | +9 | # Quoting style is retained, + | + = help: Apply a natural sort to `Klass.__match_args__` + +ℹ Safe fix +4 4 | +5 5 | class Klass: +6 6 | __slots__ = ["d", "c", "b", "a"] # a comment that is untouched +7 |- __match_args__ = ("d", "c", "b", "a") + 7 |+ __match_args__ = ("a", "b", "c", "d") +8 8 | +9 9 | # Quoting style is retained, +10 10 | # but unnecessary parens are not + +RUF023.py:11:22: RUF023 [*] `Klass.__slots__` is not sorted + | + 9 | # Quoting style is retained, +10 | # but unnecessary parens are not +11 | __slots__: set = {'b', "c", ((('a')))} + | ^^^^^^^^^^^^^^^^^^^^^ RUF023 +12 | # Trailing commas are also not retained for single-line definitions +13 | # (but they are in multiline definitions) + | + = help: Apply a natural sort to `Klass.__slots__` + +ℹ Safe fix +8 8 | +9 9 | # Quoting style is retained, +10 10 | # but unnecessary parens are not +11 |- __slots__: set = {'b', "c", ((('a')))} + 11 |+ __slots__: set = {'a', 'b', "c"} +12 12 | # Trailing commas are also not retained for single-line definitions +13 13 | # (but they are in multiline definitions) +14 14 | __match_args__: tuple = ("b", "c", "a",) + +RUF023.py:14:29: RUF023 [*] `Klass.__match_args__` is not sorted + | +12 | # Trailing commas are also not retained for single-line definitions +13 | # (but they are in multiline definitions) +14 | __match_args__: tuple = ("b", "c", "a",) + | ^^^^^^^^^^^^^^^^ RUF023 +15 | +16 | class Klass2: + | + = help: Apply a natural sort to `Klass.__match_args__` + +ℹ Safe fix +11 11 | __slots__: set = {'b', "c", ((('a')))} +12 12 | # Trailing commas are also not retained for single-line definitions +13 13 | # (but they are in multiline definitions) +14 |- __match_args__: tuple = ("b", "c", "a",) + 14 |+ __match_args__: tuple = ("a", "b", "c") +15 15 | +16 16 | class Klass2: +17 17 | if bool(): + +RUF023.py:18:21: RUF023 [*] `Klass2.__slots__` is not sorted + | +16 | class Klass2: +17 | if bool(): +18 | __slots__ = {"x": "docs for x", "m": "docs for m", "a": "docs for a"} + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF023 +19 | else: +20 | __slots__ = "foo3", "foo2", "foo1" # NB: an implicit tuple (without parens) + | + = help: Apply a natural sort to `Klass2.__slots__` + +ℹ Safe fix +15 15 | +16 16 | class Klass2: +17 17 | if bool(): +18 |- __slots__ = {"x": "docs for x", "m": "docs for m", "a": "docs for a"} + 18 |+ __slots__ = {"a": "docs for a", "m": "docs for m", "x": "docs for x"} +19 19 | else: +20 20 | __slots__ = "foo3", "foo2", "foo1" # NB: an implicit tuple (without parens) +21 21 | + +RUF023.py:20:21: RUF023 [*] `Klass2.__slots__` is not sorted + | +18 | __slots__ = {"x": "docs for x", "m": "docs for m", "a": "docs for a"} +19 | else: +20 | __slots__ = "foo3", "foo2", "foo1" # NB: an implicit tuple (without parens) + | ^^^^^^^^^^^^^^^^^^^^^^ RUF023 +21 | +22 | __match_args__: list[str] = ["the", "three", "little", "pigs"] + | + = help: Apply a natural sort to `Klass2.__slots__` + +ℹ Safe fix +17 17 | if bool(): +18 18 | __slots__ = {"x": "docs for x", "m": "docs for m", "a": "docs for a"} +19 19 | else: +20 |- __slots__ = "foo3", "foo2", "foo1" # NB: an implicit tuple (without parens) + 20 |+ __slots__ = "foo1", "foo2", "foo3" # NB: an implicit tuple (without parens) +21 21 | +22 22 | __match_args__: list[str] = ["the", "three", "little", "pigs"] +23 23 | __slots__ = ("parenthesized_item"), "in", ("an_unparenthesized_tuple") + +RUF023.py:22:33: RUF023 [*] `Klass2.__match_args__` is not sorted + | +20 | __slots__ = "foo3", "foo2", "foo1" # NB: an implicit tuple (without parens) +21 | +22 | __match_args__: list[str] = ["the", "three", "little", "pigs"] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF023 +23 | __slots__ = ("parenthesized_item"), "in", ("an_unparenthesized_tuple") +24 | # we use natural sort, + | + = help: Apply a natural sort to `Klass2.__match_args__` + +ℹ Safe fix +19 19 | else: +20 20 | __slots__ = "foo3", "foo2", "foo1" # NB: an implicit tuple (without parens) +21 21 | +22 |- __match_args__: list[str] = ["the", "three", "little", "pigs"] + 22 |+ __match_args__: list[str] = ["little", "pigs", "the", "three"] +23 23 | __slots__ = ("parenthesized_item"), "in", ("an_unparenthesized_tuple") +24 24 | # we use natural sort, +25 25 | # not alphabetical sort or "isort-style" sort + +RUF023.py:23:17: RUF023 [*] `Klass2.__slots__` is not sorted + | +22 | __match_args__: list[str] = ["the", "three", "little", "pigs"] +23 | __slots__ = ("parenthesized_item"), "in", ("an_unparenthesized_tuple") + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF023 +24 | # we use natural sort, +25 | # not alphabetical sort or "isort-style" sort + | + = help: Apply a natural sort to `Klass2.__slots__` + +ℹ Safe fix +20 20 | __slots__ = "foo3", "foo2", "foo1" # NB: an implicit tuple (without parens) +21 21 | +22 22 | __match_args__: list[str] = ["the", "three", "little", "pigs"] +23 |- __slots__ = ("parenthesized_item"), "in", ("an_unparenthesized_tuple") + 23 |+ __slots__ = "an_unparenthesized_tuple", "in", "parenthesized_item" +24 24 | # we use natural sort, +25 25 | # not alphabetical sort or "isort-style" sort +26 26 | __slots__ = {"aadvark237", "aadvark10092", "aadvark174", "aadvark532"} + +RUF023.py:26:17: RUF023 [*] `Klass2.__slots__` is not sorted + | +24 | # we use natural sort, +25 | # not alphabetical sort or "isort-style" sort +26 | __slots__ = {"aadvark237", "aadvark10092", "aadvark174", "aadvark532"} + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF023 +27 | +28 | ############################ + | + = help: Apply a natural sort to `Klass2.__slots__` + +ℹ Safe fix +23 23 | __slots__ = ("parenthesized_item"), "in", ("an_unparenthesized_tuple") +24 24 | # we use natural sort, +25 25 | # not alphabetical sort or "isort-style" sort +26 |- __slots__ = {"aadvark237", "aadvark10092", "aadvark174", "aadvark532"} + 26 |+ __slots__ = {"aadvark174", "aadvark237", "aadvark532", "aadvark10092"} +27 27 | +28 28 | ############################ +29 29 | # Neat multiline definitions + +RUF023.py:33:17: RUF023 [*] `Klass3.__slots__` is not sorted + | +32 | class Klass3: +33 | __slots__ = ( + | _________________^ +34 | | "d0", +35 | | "c0", # a comment regarding 'c0' +36 | | "b0", +37 | | # a comment regarding 'a0': +38 | | "a0" +39 | | ) + | |_____^ RUF023 +40 | __match_args__ = [ +41 | "d", + | + = help: Apply a natural sort to `Klass3.__slots__` + +ℹ Safe fix +31 31 | +32 32 | class Klass3: +33 33 | __slots__ = ( +34 |- "d0", + 34 |+ # a comment regarding 'a0': + 35 |+ "a0", + 36 |+ "b0", +35 37 | "c0", # a comment regarding 'c0' +36 |- "b0", +37 |- # a comment regarding 'a0': +38 |- "a0" + 38 |+ "d0" +39 39 | ) +40 40 | __match_args__ = [ +41 41 | "d", + +RUF023.py:40:22: RUF023 [*] `Klass3.__match_args__` is not sorted + | +38 | "a0" +39 | ) +40 | __match_args__ = [ + | ______________________^ +41 | | "d", +42 | | "c", # a comment regarding 'c' +43 | | "b", +44 | | # a comment regarding 'a': +45 | | "a" +46 | | ] + | |_____^ RUF023 +47 | +48 | ########################################## + | + = help: Apply a natural sort to `Klass3.__match_args__` + +ℹ Safe fix +38 38 | "a0" +39 39 | ) +40 40 | __match_args__ = [ +41 |- "d", + 41 |+ # a comment regarding 'a': + 42 |+ "a", + 43 |+ "b", +42 44 | "c", # a comment regarding 'c' +43 |- "b", +44 |- # a comment regarding 'a': +45 |- "a" + 45 |+ "d" +46 46 | ] +47 47 | +48 48 | ########################################## + +RUF023.py:54:17: RUF023 [*] `Klass4.__slots__` is not sorted + | +52 | class Klass4: +53 | # comment0 +54 | __slots__ = ("d", "a", # comment1 + | _________________^ +55 | | # comment2 +56 | | "f", "b", +57 | | "strangely", # comment3 +58 | | # comment4 +59 | | "formatted", +60 | | # comment5 +61 | | ) # comment6 + | |_____^ RUF023 +62 | # comment7 + | + = help: Apply a natural sort to `Klass4.__slots__` + +ℹ Safe fix +51 51 | +52 52 | class Klass4: +53 53 | # comment0 +54 |- __slots__ = ("d", "a", # comment1 +55 |- # comment2 +56 |- "f", "b", +57 |- "strangely", # comment3 +58 |- # comment4 + 54 |+ __slots__ = ( + 55 |+ "a", + 56 |+ "b", + 57 |+ "d", # comment1 + 58 |+ # comment2 + 59 |+ "f", + 60 |+ # comment4 +59 61 | "formatted", + 62 |+ "strangely", # comment3 +60 63 | # comment5 +61 64 | ) # comment6 +62 65 | # comment7 + +RUF023.py:64:22: RUF023 [*] `Klass4.__match_args__` is not sorted + | +62 | # comment7 +63 | +64 | __match_args__ = [ # comment0 + | ______________________^ +65 | | # comment1 +66 | | # comment2 +67 | | "dx", "cx", "bx", "ax" # comment3 +68 | | # comment4 +69 | | # comment5 +70 | | # comment6 +71 | | ] # comment7 + | |_____^ RUF023 +72 | +73 | # from cpython/Lib/pathlib/__init__.py + | + = help: Apply a natural sort to `Klass4.__match_args__` + +ℹ Safe fix +62 62 | # comment7 +63 63 | +64 64 | __match_args__ = [ # comment0 + 65 |+ "ax", + 66 |+ "bx", + 67 |+ "cx", +65 68 | # comment1 +66 69 | # comment2 +67 |- "dx", "cx", "bx", "ax" # comment3 + 70 |+ "dx" # comment3 +68 71 | # comment4 +69 72 | # comment5 +70 73 | # comment6 + +RUF023.py:75:17: RUF023 [*] `PurePath.__slots__` is not sorted + | + 73 | # from cpython/Lib/pathlib/__init__.py + 74 | class PurePath: + 75 | __slots__ = ( + | _________________^ + 76 | | # The `_raw_paths` slot stores unnormalized string paths. This is set + 77 | | # in the `__init__()` method. + 78 | | '_raw_paths', + 79 | | + 80 | | # The `_drv`, `_root` and `_tail_cached` slots store parsed and + 81 | | # normalized parts of the path. They are set when any of the `drive`, + 82 | | # `root` or `_tail` properties are accessed for the first time. The + 83 | | # three-part division corresponds to the result of + 84 | | # `os.path.splitroot()`, except that the tail is further split on path + 85 | | # separators (i.e. it is a list of strings), and that the root and + 86 | | # tail are normalized. + 87 | | '_drv', '_root', '_tail_cached', + 88 | | + 89 | | # The `_str` slot stores the string representation of the path, + 90 | | # computed from the drive, root and tail when `__str__()` is called + 91 | | # for the first time. It's used to implement `_str_normcase` + 92 | | '_str', + 93 | | + 94 | | # The `_str_normcase_cached` slot stores the string path with + 95 | | # normalized case. It is set when the `_str_normcase` property is + 96 | | # accessed for the first time. It's used to implement `__eq__()` + 97 | | # `__hash__()`, and `_parts_normcase` + 98 | | '_str_normcase_cached', + 99 | | +100 | | # The `_parts_normcase_cached` slot stores the case-normalized +101 | | # string path after splitting on path separators. It's set when the +102 | | # `_parts_normcase` property is accessed for the first time. It's used +103 | | # to implement comparison methods like `__lt__()`. +104 | | '_parts_normcase_cached', +105 | | +106 | | # The `_hash` slot stores the hash of the case-normalized string +107 | | # path. It's set when `__hash__()` is called for the first time. +108 | | '_hash', +109 | | ) + | |_____^ RUF023 +110 | +111 | # From cpython/Lib/pickletools.py + | + = help: Apply a natural sort to `PurePath.__slots__` + +ℹ Safe fix +73 73 | # from cpython/Lib/pathlib/__init__.py +74 74 | class PurePath: +75 75 | __slots__ = ( +76 |- # The `_raw_paths` slot stores unnormalized string paths. This is set +77 |- # in the `__init__()` method. +78 |- '_raw_paths', +79 |- +80 76 | # The `_drv`, `_root` and `_tail_cached` slots store parsed and +81 77 | # normalized parts of the path. They are set when any of the `drive`, +82 78 | # `root` or `_tail` properties are accessed for the first time. The +-------------------------------------------------------------------------------- +84 80 | # `os.path.splitroot()`, except that the tail is further split on path +85 81 | # separators (i.e. it is a list of strings), and that the root and +86 82 | # tail are normalized. +87 |- '_drv', '_root', '_tail_cached', +88 |- + 83 |+ '_drv', + 84 |+ # The `_hash` slot stores the hash of the case-normalized string + 85 |+ # path. It's set when `__hash__()` is called for the first time. + 86 |+ '_hash', + 87 |+ # The `_parts_normcase_cached` slot stores the case-normalized + 88 |+ # string path after splitting on path separators. It's set when the + 89 |+ # `_parts_normcase` property is accessed for the first time. It's used + 90 |+ # to implement comparison methods like `__lt__()`. + 91 |+ '_parts_normcase_cached', + 92 |+ # The `_raw_paths` slot stores unnormalized string paths. This is set + 93 |+ # in the `__init__()` method. + 94 |+ '_raw_paths', + 95 |+ '_root', +89 96 | # The `_str` slot stores the string representation of the path, +90 97 | # computed from the drive, root and tail when `__str__()` is called +91 98 | # for the first time. It's used to implement `_str_normcase` +92 99 | '_str', +93 |- +94 100 | # The `_str_normcase_cached` slot stores the string path with +95 101 | # normalized case. It is set when the `_str_normcase` property is +96 102 | # accessed for the first time. It's used to implement `__eq__()` +97 103 | # `__hash__()`, and `_parts_normcase` +98 104 | '_str_normcase_cached', +99 |- +100 |- # The `_parts_normcase_cached` slot stores the case-normalized +101 |- # string path after splitting on path separators. It's set when the +102 |- # `_parts_normcase` property is accessed for the first time. It's used +103 |- # to implement comparison methods like `__lt__()`. +104 |- '_parts_normcase_cached', +105 |- +106 |- # The `_hash` slot stores the hash of the case-normalized string +107 |- # path. It's set when `__hash__()` is called for the first time. +108 |- '_hash', + 105 |+ '_tail_cached', +109 106 | ) +110 107 | +111 108 | # From cpython/Lib/pickletools.py + +RUF023.py:113:17: RUF023 [*] `ArgumentDescriptor.__slots__` is not sorted + | +111 | # From cpython/Lib/pickletools.py +112 | class ArgumentDescriptor(object): +113 | __slots__ = ( + | _________________^ +114 | | # name of descriptor record, also a module global name; a string +115 | | 'name', +116 | | +117 | | # length of argument, in bytes; an int; UP_TO_NEWLINE and +118 | | # TAKEN_FROM_ARGUMENT{1,4,8} are negative values for variable-length +119 | | # cases +120 | | 'n', +121 | | +122 | | # a function taking a file-like object, reading this kind of argument +123 | | # from the object at the current position, advancing the current +124 | | # position by n bytes, and returning the value of the argument +125 | | 'reader', +126 | | +127 | | # human-readable docs for this arg descriptor; a string +128 | | 'doc', +129 | | ) + | |_____^ RUF023 +130 | +131 | #################################### + | + = help: Apply a natural sort to `ArgumentDescriptor.__slots__` + +ℹ Safe fix +111 111 | # From cpython/Lib/pickletools.py +112 112 | class ArgumentDescriptor(object): +113 113 | __slots__ = ( +114 |- # name of descriptor record, also a module global name; a string +115 |- 'name', +116 |- + 114 |+ # human-readable docs for this arg descriptor; a string + 115 |+ 'doc', +117 116 | # length of argument, in bytes; an int; UP_TO_NEWLINE and +118 117 | # TAKEN_FROM_ARGUMENT{1,4,8} are negative values for variable-length +119 118 | # cases +120 119 | 'n', +121 |- + 120 |+ # name of descriptor record, also a module global name; a string + 121 |+ 'name', +122 122 | # a function taking a file-like object, reading this kind of argument +123 123 | # from the object at the current position, advancing the current +124 124 | # position by n bytes, and returning the value of the argument +125 125 | 'reader', +126 |- +127 |- # human-readable docs for this arg descriptor; a string +128 |- 'doc', +129 126 | ) +130 127 | +131 128 | #################################### + +RUF023.py:138:17: RUF023 `SlotUser.__slots__` is not sorted + | +136 | # Multiline dicts are out of scope. +137 | class SlotUser: +138 | __slots__ = {'power': 'measured in kilowatts', + | _________________^ +139 | | 'distance': 'measured in kilometers'} + | |______________________________________________________^ RUF023 +140 | +141 | class Klass5: + | + = help: Apply a natural sort to `SlotUser.__slots__` + +RUF023.py:142:22: RUF023 `Klass5.__match_args__` is not sorted + | +141 | class Klass5: +142 | __match_args__ = ( + | ______________________^ +143 | | "look", +144 | | ( +145 | | "a_veeeeeeeeeeeeeeeeeeery_long_parenthesized_item" +146 | | ), +147 | | ) + | |_____^ RUF023 +148 | __slots__ = ( +149 | "b", + | + = help: Apply a natural sort to `Klass5.__match_args__` + +RUF023.py:148:17: RUF023 `Klass5.__slots__` is not sorted + | +146 | ), +147 | ) +148 | __slots__ = ( + | _________________^ +149 | | "b", +150 | | (( +151 | | "c" +152 | | )), +153 | | "a" +154 | | ) + | |_____^ RUF023 +155 | __slots__ = ("don't" "care" "about", "__slots__" "with", "concatenated" "strings") + | + = help: Apply a natural sort to `Klass5.__slots__` + +RUF023.py:155:17: RUF023 `Klass5.__slots__` is not sorted + | +153 | "a" +154 | ) +155 | __slots__ = ("don't" "care" "about", "__slots__" "with", "concatenated" "strings") + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF023 +156 | +157 | ################################### + | + = help: Apply a natural sort to `Klass5.__slots__` + + diff --git a/ruff.schema.json b/ruff.schema.json index edafa4acf6ef3..023cfd6037d5f 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -3449,6 +3449,7 @@ "RUF020", "RUF021", "RUF022", + "RUF023", "RUF024", "RUF1", "RUF10",