Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ruff] Mark fixes for unsorted-dunder-all and unsorted-dunder-slots as unsafe when there are complex comments in the sequence (RUF022, RUF023) #14560

Merged
merged 1 commit into from
Nov 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
use std::borrow::Cow;
use std::cmp::Ordering;

use itertools::Itertools;

use ruff_python_ast as ast;
use ruff_python_codegen::Stylist;
use ruff_python_parser::{TokenKind, Tokens};
Expand Down Expand Up @@ -314,6 +316,52 @@ impl<'a> SortClassification<'a> {
}
}

/// The complexity of the comments in a multiline sequence.
///
/// A sequence like this has "simple" comments: it's unambiguous
/// which item each comment refers to, so there's no "risk" in sorting it:
///
/// ```py
/// __all__ = [
/// "foo", # comment1
/// "bar", # comment2
/// ]
/// ```
///
/// This sequence has complex comments: we can't safely autofix the sort here,
/// as the own-line comments might be used to create sections in `__all__`:
///
/// ```py
/// __all__ = [
/// # fooey things
/// "foo1",
/// "foo2",
/// # barey things
/// "bar1",
/// "foobar",
/// ]
/// ```
///
/// This sequence also has complex comments -- it's ambiguous which item
/// each comment should belong to:
///
/// ```py
/// __all__ = [
/// "foo1", "foo", "barfoo", # fooey things
/// "baz", bazz2", "fbaz", # barrey things
/// ]
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub(super) enum CommentComplexity {
Simple,
Complex,
}

impl CommentComplexity {
pub(super) const fn is_complex(self) -> bool {
matches!(self, CommentComplexity::Complex)
}
}

// An instance of this struct encapsulates an analysis
/// of a multiline Python tuple/list that represents an
/// `__all__`/`__slots__`/etc. definition or augmentation.
Expand All @@ -328,6 +376,24 @@ impl<'a> MultilineStringSequenceValue<'a> {
self.items.len()
}

/// Determine the [`CommentComplexity`] of this multiline string sequence.
pub(super) fn comment_complexity(&self) -> CommentComplexity {
if self.items.iter().tuple_windows().any(|(first, second)| {
first.has_own_line_comments()
|| first
.end_of_line_comments
.is_some_and(|end_line_comment| second.start() < end_line_comment.end())
}) || self
.items
.last()
.is_some_and(StringSequenceItem::has_own_line_comments)
{
CommentComplexity::Complex
} else {
CommentComplexity::Simple
}
}

/// Analyse the source range for a multiline Python tuple/list that
/// represents an `__all__`/`__slots__`/etc. definition or augmentation.
/// Return `None` if the analysis fails for whatever reason.
Expand Down Expand Up @@ -792,6 +858,10 @@ impl<'a> StringSequenceItem<'a> {
fn with_no_comments(value: &'a str, element_range: TextRange) -> Self {
Self::new(value, vec![], element_range, None)
}

fn has_own_line_comments(&self) -> bool {
!self.preceding_comment_ranges.is_empty()
}
}

impl Ranged for StringSequenceItem<'_> {
Expand Down
104 changes: 65 additions & 39 deletions crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation};
use ruff_diagnostics::{Applicability, Diagnostic, Edit, Fix, FixAvailability, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast as ast;
use ruff_source_file::LineRanges;
Expand Down Expand Up @@ -54,19 +54,40 @@ use crate::rules::ruff::rules::sequence_sorting::{
/// ```
///
/// ## Fix safety
/// This rule's fix is marked as always being safe, in that
/// This rule's fix is marked as unsafe if there are any comments that take up
/// a whole line by themselves inside the `__all__` definition, for example:
/// ```py
/// __all__ = [
/// # eggy things
/// "duck_eggs",
/// "chicken_eggs",
/// # hammy things
/// "country_ham",
/// "parma_ham",
/// ]
/// ```
///
/// This is a common pattern used to delimit categories within a module's API,
/// but it would be out of the scope of this rule to attempt to maintain these
/// categories when alphabetically sorting the items of `__all__`.
///
/// The fix is also marked as unsafe if there are more than two `__all__` items
/// on a single line and that line also has a trailing comment, since here it
/// is impossible to accurately gauge which item the comment should be moved
/// with when sorting `__all__`:
/// ```py
/// __all__ = [
/// "a", "c", "e", # a comment
/// "b", "d", "f", # a second comment
/// ]
/// ```
///
/// Other than this, the rule's fix is marked as always being safe, in that
/// it should very rarely alter the semantics of any Python code.
/// However, note that (although it's rare) the value of `__all__`
/// could be read by code elsewhere that depends on the exact
/// iteration order of the items in `__all__`, in which case this
/// rule's fix could theoretically cause breakage.
///
/// Note also that for multiline `__all__` definitions
/// that include comments on their own line, it can be hard
/// to tell where the comments should be moved to when sorting
/// the contents of `__all__`. While this rule's fix will
/// never delete a comment, it might *sometimes* move a
/// comment to an unexpected location.
#[violation]
pub struct UnsortedDunderAll;

Expand Down Expand Up @@ -208,37 +229,42 @@ fn create_fix(
let locator = checker.locator();
let is_multiline = locator.contains_line_break(range);

let sorted_source_code = {
// The machinery in the `MultilineDunderAllValue` is actually
// sophisticated enough that it would work just as well for
// single-line `__all__` definitions, and we could reduce
// the number of lines of code in this file by doing that.
// Unfortunately, however, `MultilineDunderAllValue::from_source_range()`
// must process every token in an `__all__` definition as
// part of its analysis, and this is quite slow. For
// single-line `__all__` definitions, it's also unnecessary,
// as it's impossible to have comments in between the
// `__all__` elements if the `__all__` definition is all on
// a single line. Therefore, as an optimisation, we do the
// bare minimum of token-processing for single-line `__all__`
// definitions:
if is_multiline {
let value = MultilineStringSequenceValue::from_source_range(
range,
kind,
locator,
checker.tokens(),
string_items,
)?;
assert_eq!(value.len(), elts.len());
value.into_sorted_source_code(SORTING_STYLE, locator, checker.stylist())
// The machinery in the `MultilineDunderAllValue` is actually
// sophisticated enough that it would work just as well for
// single-line `__all__` definitions, and we could reduce
// the number of lines of code in this file by doing that.
// Unfortunately, however, `MultilineDunderAllValue::from_source_range()`
// must process every token in an `__all__` definition as
// part of its analysis, and this is quite slow. For
// single-line `__all__` definitions, it's also unnecessary,
// as it's impossible to have comments in between the
// `__all__` elements if the `__all__` definition is all on
// a single line. Therefore, as an optimisation, we do the
// bare minimum of token-processing for single-line `__all__`
// definitions:
let (sorted_source_code, applicability) = if is_multiline {
let value = MultilineStringSequenceValue::from_source_range(
range,
kind,
locator,
checker.tokens(),
string_items,
)?;
assert_eq!(value.len(), elts.len());
let applicability = if value.comment_complexity().is_complex() {
Applicability::Unsafe
} else {
sort_single_line_elements_sequence(kind, elts, string_items, locator, SORTING_STYLE)
}
Applicability::Safe
};
let sorted_source =
value.into_sorted_source_code(SORTING_STYLE, locator, checker.stylist());
(sorted_source, applicability)
} else {
let sorted_source =
sort_single_line_elements_sequence(kind, elts, string_items, locator, SORTING_STYLE);
(sorted_source, Applicability::Safe)
};

Some(Fix::safe_edit(Edit::range_replacement(
sorted_source_code,
range,
)))
let edit = Edit::range_replacement(sorted_source_code, range);
Some(Fix::applicable_edit(edit, applicability))
}
92 changes: 65 additions & 27 deletions crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ use ruff_text_size::{Ranged, TextRange};

use crate::checkers::ast::Checker;
use crate::rules::ruff::rules::sequence_sorting::{
sort_single_line_elements_sequence, MultilineStringSequenceValue, SequenceKind,
SortClassification, SortingStyle,
sort_single_line_elements_sequence, CommentComplexity, MultilineStringSequenceValue,
SequenceKind, SortClassification, SortingStyle,
};
use crate::Locator;

Expand All @@ -37,24 +37,51 @@ use crate::Locator;
/// ```
///
/// ## Fix safety
/// This rule's fix is marked as unsafe whenever Ruff can detect that code
/// elsewhere in the same file reads the `__slots__` variable in some way.
/// This is because the order of the items in `__slots__` may have semantic
/// significance if the `__slots__` of a class is being iterated over, or
/// being assigned to another value.
/// This rule's fix is marked as unsafe in three situations.
///
/// Firstly, the fix is unsafe if there are any comments that take up
/// a whole line by themselves inside the `__slots__` definition, for example:
/// ```py
/// class Foo:
/// __slots__ = [
/// # eggy things
/// "duck_eggs",
/// "chicken_eggs",
/// # hammy things
/// "country_ham",
/// "parma_ham",
/// ]
/// ```
///
/// This is a common pattern used to delimit categories within a class's slots,
/// but it would be out of the scope of this rule to attempt to maintain these
/// categories when applying a natural sort to the items of `__slots__`.
///
/// Secondly, the fix is also marked as unsafe if there are more than two
/// `__slots__` items on a single line and that line also has a trailing
/// comment, since here it is impossible to accurately gauge which item the
/// comment should be moved with when sorting `__slots__`:
/// ```py
/// class Bar:
/// __slots__ = [
/// "a", "c", "e", # a comment
/// "b", "d", "f", # a second comment
/// ]
/// ```
///
/// Lastly, this rule's fix is marked as unsafe whenever Ruff can detect that
/// code elsewhere in the same file reads the `__slots__` variable in some way
/// and the `__slots__` variable is not assigned to a set. This is because the
/// order of the items in `__slots__` may have semantic significance if the
/// `__slots__` of a class is being iterated over, or being assigned to another
/// value.
///
/// In the vast majority of other cases, this rule's fix is unlikely to
/// cause breakage; as such, Ruff will otherwise mark this rule's fix as
/// safe. However, note that (although it's rare) the value of `__slots__`
/// could still be read by code outside of the module in which the
/// `__slots__` definition occurs, in which case this rule's fix could
/// theoretically cause breakage.
///
/// Additionally, note that for multiline `__slots__` definitions that
/// include comments on their own line, it can be hard to tell where the
/// comments should be moved to when sorting the contents of `__slots__`.
/// While this rule's fix will never delete a comment, it might *sometimes*
/// move a comment to an unexpected location.
#[violation]
pub struct UnsortedDunderSlots {
class_name: ast::name::Name,
Expand Down Expand Up @@ -122,15 +149,17 @@ pub(crate) fn sort_dunder_slots(checker: &Checker, binding: &Binding) -> Option<
);

if let SortClassification::UnsortedAndMaybeFixable { items } = sort_classification {
if let Some(sorted_source_code) = display.generate_sorted_source_code(&items, checker) {
if let Some((sorted_source_code, comment_complexity)) =
display.generate_sorted_source_code(&items, checker)
{
let edit = Edit::range_replacement(sorted_source_code, display.range());

let applicability = if display.kind.is_set_literal() || binding.is_unused() {
Applicability::Safe
} else {
let applicability = if comment_complexity.is_complex()
|| (binding.is_used() && !display.kind.is_set_literal())
{
Applicability::Unsafe
} else {
Applicability::Safe
};

diagnostic.set_fix(Fix::applicable_edit(edit, applicability));
}
}
Expand Down Expand Up @@ -219,7 +248,11 @@ impl<'a> StringLiteralDisplay<'a> {
Some(result)
}

fn generate_sorted_source_code(&self, elements: &[&str], checker: &Checker) -> Option<String> {
fn generate_sorted_source_code(
&self,
elements: &[&str],
checker: &Checker,
) -> Option<(String, CommentComplexity)> {
let locator = checker.locator();

let multiline_classification = if locator.contains_line_break(self.range()) {
Expand All @@ -238,26 +271,31 @@ impl<'a> StringLiteralDisplay<'a> {
elements,
)?;
assert_eq!(analyzed_sequence.len(), self.elts.len());
Some(analyzed_sequence.into_sorted_source_code(
let comment_complexity = analyzed_sequence.comment_complexity();
let sorted_code = analyzed_sequence.into_sorted_source_code(
SORTING_STYLE,
locator,
checker.stylist(),
))
);
Some((sorted_code, comment_complexity))
}
// Sorting multiline dicts is unsupported
(DisplayKind::Dict { .. }, MultilineClassification::Multiline) => None,
(DisplayKind::Sequence(sequence_kind), MultilineClassification::SingleLine) => {
Some(sort_single_line_elements_sequence(
let sorted_code = sort_single_line_elements_sequence(
*sequence_kind,
&self.elts,
elements,
locator,
SORTING_STYLE,
))
);
Some((sorted_code, CommentComplexity::Simple))
}
(DisplayKind::Dict { items }, MultilineClassification::SingleLine) => {
let sorted_code =
sort_single_line_elements_dict(&self.elts, elements, items, locator);
Some((sorted_code, CommentComplexity::Simple))
}
(DisplayKind::Dict { items }, MultilineClassification::SingleLine) => Some(
sort_single_line_elements_dict(&self.elts, elements, items, locator),
),
}
}
}
Expand Down
Loading
Loading