Implement whitespace-before-comment (E261, E262, E265, E266) (#2654)

astral-sh · Feb 8, 2023 · 824c0d2 · 824c0d2
1 parent f5efdd0
commit 824c0d2
Show file tree

Hide file tree

Showing 11 changed files with 394 additions and 15 deletions.
diff --git a/crates/ruff/resources/test/fixtures/pycodestyle/E26.py b/crates/ruff/resources/test/fixtures/pycodestyle/E26.py
@@ -0,0 +1,66 @@
+#: E261:1:5
+pass # an inline comment
+#: E262:1:12
+x = x + 1  #Increment x
+#: E262:1:12
+x = x + 1  #  Increment x
+#: E262:1:12
+x = y + 1  #:  Increment x
+#: E265:1:1
+#Block comment
+a = 1
+#: E265:2:1
+m = 42
+#! This is important
+mx = 42 - 42
+#: E266:3:5 E266:6:5
+def how_it_feel(r):
+
+    ### This is a variable ###
+    a = 42
+
+    ### Of course it is unused
+    return
+#: E265:1:1 E266:2:1
+##if DEBUG:
+##    logging.error()
+#: W291:1:42
+#########################################
+#:
+
+#: Okay
+#!/usr/bin/env python
+
+pass  # an inline comment
+x = x + 1   # Increment x
+y = y + 1   #: Increment x
+
+# Block comment
+a = 1
+
+# Block comment1
+
+# Block comment2
+aaa = 1
+
+
+# example of docstring (not parsed)
+def oof():
+    """
+    #foo not parsed
+    """
+
+    ####################################################################
+    #                            A SEPARATOR                           #
+    ####################################################################
+
+    # ################################################################ #
+    # ####################### another separator ###################### #
+    # ################################################################ #
+#: E262:3:9
+# -*- coding: utf8 -*-
+#  (One space one NBSP) Ok for block comment
+a = 42  #  (One space one NBSP)
+#: E262:2:9
+#  (Two spaces) Ok for block comment
+a = 42  #  (Two spaces)
diff --git a/crates/ruff/src/checkers/logical_lines.rs b/crates/ruff/src/checkers/logical_lines.rs
@@ -8,6 +8,7 @@ use crate::registry::Diagnostic;
 use crate::rules::pycodestyle::logical_lines::{iter_logical_lines, TokenFlags};
 use crate::rules::pycodestyle::rules::{
     extraneous_whitespace, indentation, space_around_operator, whitespace_around_keywords,
+    whitespace_before_comment,
 };
 use crate::settings::Settings;
 use crate::source_code::{Locator, Stylist};
@@ -107,6 +108,19 @@ pub fn check_logical_lines(
                 }
             }
         }
+        if line.flags.contains(TokenFlags::COMMENT) {
+            for (range, kind) in whitespace_before_comment(&line.tokens, locator) {
+                if settings.rules.enabled(kind.rule()) {
+                    diagnostics.push(Diagnostic {
+                        kind,
+                        location: range.location,
+                        end_location: range.end_location,
+                        fix: None,
+                        parent: None,
+                    });
+                }
+            }
+        }
 
         for (index, kind) in indentation(
             &line,

diff --git a/crates/ruff/src/registry.rs b/crates/ruff/src/registry.rs
@@ -42,6 +42,14 @@ ruff_macros::define_rule_mapping!(
     #[cfg(feature = "logical_lines")]
     E224 => rules::pycodestyle::rules::TabAfterOperator,
     #[cfg(feature = "logical_lines")]
+    E261 => rules::pycodestyle::rules::TooFewSpacesBeforeInlineComment,
+    #[cfg(feature = "logical_lines")]
+    E262 => rules::pycodestyle::rules::NoSpaceAfterInlineComment,
+    #[cfg(feature = "logical_lines")]
+    E265 => rules::pycodestyle::rules::NoSpaceAfterBlockComment,
+    #[cfg(feature = "logical_lines")]
+    E266 => rules::pycodestyle::rules::MultipleLeadingHashesForBlockComment,
+    #[cfg(feature = "logical_lines")]
     E271 => rules::pycodestyle::rules::MultipleSpacesAfterKeyword,
     #[cfg(feature = "logical_lines")]
     E272 => rules::pycodestyle::rules::MultipleSpacesBeforeKeyword,
@@ -757,22 +765,26 @@ impl Rule {
             #[cfg(feature = "logical_lines")]
             Rule::IndentationWithInvalidMultiple
             | Rule::IndentationWithInvalidMultipleComment
+            | Rule::MultipleLeadingHashesForBlockComment
+            | Rule::MultipleSpacesAfterKeyword
             | Rule::MultipleSpacesAfterOperator
+            | Rule::MultipleSpacesBeforeKeyword
             | Rule::MultipleSpacesBeforeOperator
             | Rule::NoIndentedBlock
             | Rule::NoIndentedBlockComment
+            | Rule::NoSpaceAfterBlockComment
+            | Rule::NoSpaceAfterInlineComment
             | Rule::OverIndented
+            | Rule::TabAfterKeyword
             | Rule::TabAfterOperator
+            | Rule::TabBeforeKeyword
             | Rule::TabBeforeOperator
+            | Rule::TooFewSpacesBeforeInlineComment
             | Rule::UnexpectedIndentation
             | Rule::UnexpectedIndentationComment
             | Rule::WhitespaceAfterOpenBracket
             | Rule::WhitespaceBeforeCloseBracket
-            | Rule::WhitespaceBeforePunctuation
-            | Rule::MultipleSpacesAfterKeyword
-            | Rule::MultipleSpacesBeforeKeyword
-            | Rule::TabAfterKeyword
-            | Rule::TabBeforeKeyword => &LintSource::LogicalLines,
+            | Rule::WhitespaceBeforePunctuation => &LintSource::LogicalLines,
             _ => &LintSource::Ast,
         }
     }

diff --git a/crates/ruff/src/rules/pycodestyle/logical_lines.rs b/crates/ruff/src/rules/pycodestyle/logical_lines.rs
@@ -16,29 +16,35 @@ bitflags! {
         const PUNCTUATION = 0b0000_0100;
         /// Whether the logical line contains a keyword.
         const KEYWORD = 0b0000_1000;
+        /// Whether the logical line contains a comment.
+        const COMMENT = 0b0001_0000;
     }
 }
 
 #[derive(Debug)]
-pub struct LogicalLine {
+pub struct LogicalLine<'a> {
     pub text: String,
     pub mapping: Vec<(usize, Location)>,
     pub flags: TokenFlags,
+    pub tokens: Vec<(Location, &'a Tok, Location)>,
 }
 
-impl LogicalLine {
+impl<'a> LogicalLine<'a> {
     pub fn is_comment(&self) -> bool {
         self.text.is_empty()
     }
 }
 
-fn build_line(tokens: &[(Location, &Tok, Location)], locator: &Locator) -> LogicalLine {
+fn build_line<'a>(
+    tokens: Vec<(Location, &'a Tok, Location)>,
+    locator: &Locator,
+) -> LogicalLine<'a> {
     let mut logical = String::with_capacity(88);
     let mut mapping = Vec::new();
     let mut flags = TokenFlags::empty();
     let mut prev: Option<&Location> = None;
     let mut length = 0;
-    for (start, tok, end) in tokens {
+    for (start, tok, end) in &tokens {
         if matches!(
             tok,
             Tok::Newline | Tok::NonLogicalNewline | Tok::Indent | Tok::Dedent
@@ -51,6 +57,7 @@ fn build_line(tokens: &[(Location, &Tok, Location)], locator: &Locator) -> Logic
         }
 
         if matches!(tok, Tok::Comment { .. }) {
+            flags.insert(TokenFlags::COMMENT);
             continue;
         }
 
@@ -181,10 +188,11 @@ fn build_line(tokens: &[(Location, &Tok, Location)], locator: &Locator) -> Logic
         text: logical,
         mapping,
         flags,
+        tokens,
     }
 }
 
-pub fn iter_logical_lines(tokens: &[LexResult], locator: &Locator) -> Vec<LogicalLine> {
+pub fn iter_logical_lines<'a>(tokens: &'a [LexResult], locator: &Locator) -> Vec<LogicalLine<'a>> {
     let mut parens = 0;
     let mut accumulator = Vec::with_capacity(32);
     let mut lines = Vec::with_capacity(128);
@@ -200,19 +208,19 @@ pub fn iter_logical_lines(tokens: &[LexResult], locator: &Locator) -> Vec<Logica
                 Tok::Newline | Tok::NonLogicalNewline | Tok::Comment(..)
             ) {
                 if matches!(tok, Tok::Newline) {
-                    lines.push(build_line(&accumulator, locator));
-                    accumulator.drain(..);
+                    lines.push(build_line(accumulator, locator));
+                    accumulator = Vec::with_capacity(32);
                 } else if tokens.len() == 1 {
                     accumulator.remove(0);
                 } else {
-                    lines.push(build_line(&accumulator, locator));
-                    accumulator.drain(..);
+                    lines.push(build_line(accumulator, locator));
+                    accumulator = Vec::with_capacity(32);
                 }
             }
         }
     }
     if !accumulator.is_empty() {
-        lines.push(build_line(&accumulator, locator));
+        lines.push(build_line(accumulator, locator));
     }
     lines
 }
diff --git a/crates/ruff/src/rules/pycodestyle/mod.rs b/crates/ruff/src/rules/pycodestyle/mod.rs
@@ -53,17 +53,21 @@ mod tests {
     #[cfg(feature = "logical_lines")]
     #[test_case(Rule::IndentationWithInvalidMultiple, Path::new("E11.py"))]
     #[test_case(Rule::IndentationWithInvalidMultipleComment, Path::new("E11.py"))]
+    #[test_case(Rule::MultipleLeadingHashesForBlockComment, Path::new("E26.py"))]
     #[test_case(Rule::MultipleSpacesAfterKeyword, Path::new("E27.py"))]
     #[test_case(Rule::MultipleSpacesAfterOperator, Path::new("E22.py"))]
     #[test_case(Rule::MultipleSpacesBeforeKeyword, Path::new("E27.py"))]
     #[test_case(Rule::MultipleSpacesBeforeOperator, Path::new("E22.py"))]
     #[test_case(Rule::NoIndentedBlock, Path::new("E11.py"))]
     #[test_case(Rule::NoIndentedBlockComment, Path::new("E11.py"))]
+    #[test_case(Rule::NoSpaceAfterBlockComment, Path::new("E26.py"))]
+    #[test_case(Rule::NoSpaceAfterInlineComment, Path::new("E26.py"))]
     #[test_case(Rule::OverIndented, Path::new("E11.py"))]
     #[test_case(Rule::TabAfterKeyword, Path::new("E27.py"))]
     #[test_case(Rule::TabAfterOperator, Path::new("E22.py"))]
     #[test_case(Rule::TabBeforeKeyword, Path::new("E27.py"))]
     #[test_case(Rule::TabBeforeOperator, Path::new("E22.py"))]
+    #[test_case(Rule::TooFewSpacesBeforeInlineComment, Path::new("E26.py"))]
     #[test_case(Rule::UnexpectedIndentation, Path::new("E11.py"))]
     #[test_case(Rule::UnexpectedIndentationComment, Path::new("E11.py"))]
     #[test_case(Rule::WhitespaceAfterOpenBracket, Path::new("E20.py"))]

diff --git a/crates/ruff/src/rules/pycodestyle/rules/mod.rs b/crates/ruff/src/rules/pycodestyle/rules/mod.rs
@@ -33,6 +33,10 @@ pub use whitespace_around_keywords::{
     whitespace_around_keywords, MultipleSpacesAfterKeyword, MultipleSpacesBeforeKeyword,
     TabAfterKeyword, TabBeforeKeyword,
 };
+pub use whitespace_before_comment::{
+    whitespace_before_comment, MultipleLeadingHashesForBlockComment, NoSpaceAfterBlockComment,
+    NoSpaceAfterInlineComment, TooFewSpacesBeforeInlineComment,
+};
 
 mod ambiguous_class_name;
 mod ambiguous_function_name;
@@ -53,3 +57,4 @@ mod not_tests;
 mod space_around_operator;
 mod type_comparison;
 mod whitespace_around_keywords;
+mod whitespace_before_comment;
diff --git a/crates/ruff/src/rules/pycodestyle/rules/whitespace_before_comment.rs b/crates/ruff/src/rules/pycodestyle/rules/whitespace_before_comment.rs
@@ -0,0 +1,120 @@
+#![allow(dead_code)]
+
+use rustpython_parser::ast::Location;
+use rustpython_parser::lexer::Tok;
+
+use ruff_macros::{define_violation, derive_message_formats};
+
+use crate::ast::types::Range;
+use crate::registry::DiagnosticKind;
+use crate::source_code::Locator;
+use crate::violation::Violation;
+
+define_violation!(
+    pub struct TooFewSpacesBeforeInlineComment;
+);
+impl Violation for TooFewSpacesBeforeInlineComment {
+    #[derive_message_formats]
+    fn message(&self) -> String {
+        format!("Insert at least two spaces before an inline comment")
+    }
+}
+
+define_violation!(
+    pub struct NoSpaceAfterInlineComment;
+);
+impl Violation for NoSpaceAfterInlineComment {
+    #[derive_message_formats]
+    fn message(&self) -> String {
+        format!("Inline comment should start with `# `")
+    }
+}
+
+define_violation!(
+    pub struct NoSpaceAfterBlockComment;
+);
+impl Violation for NoSpaceAfterBlockComment {
+    #[derive_message_formats]
+    fn message(&self) -> String {
+        format!("Block comment should start with `# `")
+    }
+}
+
+define_violation!(
+    pub struct MultipleLeadingHashesForBlockComment;
+);
+impl Violation for MultipleLeadingHashesForBlockComment {
+    #[derive_message_formats]
+    fn message(&self) -> String {
+        format!("Too many leading `#` before block comment")
+    }
+}
+
+/// E261, E262, E265, E266
+#[cfg(feature = "logical_lines")]
+pub fn whitespace_before_comment(
+    tokens: &[(Location, &Tok, Location)],
+    locator: &Locator,
+) -> Vec<(Range, DiagnosticKind)> {
+    let mut diagnostics = vec![];
+    let mut prev_end = Location::new(0, 0);
+    for (start, tok, end) in tokens {
+        if let Tok::Comment(text) = tok {
+            let line = locator.slice_source_code_range(&Range::new(
+                Location::new(start.row(), 0),
+                Location::new(start.row(), start.column()),
+            ));
+
+            let is_inline_comment = !line.trim().is_empty();
+            if is_inline_comment {
+                if prev_end.row() == start.row() && start.column() < prev_end.column() + 2 {
+                    diagnostics.push((
+                        Range::new(prev_end, *start),
+                        TooFewSpacesBeforeInlineComment.into(),
+                    ));
+                }
+            }
+
+            // Split into the portion before and after the first space.
+            let mut parts = text.splitn(2, ' ');
+            let symbol = parts.next().unwrap_or("");
+            let comment = parts.next().unwrap_or("");
+
+            let bad_prefix = if symbol != "#" && symbol != "#:" {
+                Some(symbol.trim_start_matches('#').chars().next().unwrap_or('#'))
+            } else {
+                None
+            };
+
+            if is_inline_comment {
+                if bad_prefix.is_some() || comment.chars().next().map_or(false, char::is_whitespace)
+                {
+                    diagnostics.push((Range::new(*start, *end), NoSpaceAfterInlineComment.into()));
+                }
+            } else if let Some(bad_prefix) = bad_prefix {
+                if bad_prefix != '!' || start.row() > 1 {
+                    if bad_prefix != '#' {
+                        diagnostics
+                            .push((Range::new(*start, *end), NoSpaceAfterBlockComment.into()));
+                    } else if !comment.is_empty() {
+                        diagnostics.push((
+                            Range::new(*start, *end),
+                            MultipleLeadingHashesForBlockComment.into(),
+                        ));
+                    }
+                }
+            }
+        } else if !matches!(tok, Tok::NonLogicalNewline) {
+            prev_end = *end;
+        }
+    }
+    diagnostics
+}
+
+#[cfg(not(feature = "logical_lines"))]
+pub fn whitespace_before_comment(
+    _tokens: &[(Location, &Tok, Location)],
+    _locator: &Locator,
+) -> Vec<(Range, DiagnosticKind)> {
+    vec![]
+}