From 8365d2e0fd465edd38fbc378569c1a22dbac4379 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Thu, 23 Nov 2023 07:40:57 -0600 Subject: [PATCH] Avoid `E703` for last expression in a cell (#8821) ## Summary This PR updates the `E703` rule to avoid flagging any semicolons if they're present after the last expression in a notebook cell. These are intended to hide the cell output. Part of #8669 ## Test Plan Add test notebook and update the snapshots. --- .../test/fixtures/pycodestyle/E703.ipynb | 94 +++++++++++++++++++ crates/ruff_linter/src/checkers/tokens.rs | 16 +++- crates/ruff_linter/src/linter.rs | 3 +- .../ruff_linter/src/rules/pycodestyle/mod.rs | 1 + .../pycodestyle/rules/compound_statements.rs | 67 ++++++++++--- ...__pycodestyle__tests__E703_E703.ipynb.snap | 46 +++++++++ 6 files changed, 212 insertions(+), 15 deletions(-) create mode 100644 crates/ruff_linter/resources/test/fixtures/pycodestyle/E703.ipynb create mode 100644 crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__E703_E703.ipynb.snap diff --git a/crates/ruff_linter/resources/test/fixtures/pycodestyle/E703.ipynb b/crates/ruff_linter/resources/test/fixtures/pycodestyle/E703.ipynb new file mode 100644 index 0000000000000..97606b62f3792 --- /dev/null +++ b/crates/ruff_linter/resources/test/fixtures/pycodestyle/E703.ipynb @@ -0,0 +1,94 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "33faf7ad-a3fd-4ac4-a0c3-52e507ed49df", + "metadata": {}, + "outputs": [], + "source": [ + "x = 1" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "481fb4bf-c1b9-47da-927f-3cfdfe4b49ec", + "metadata": {}, + "outputs": [], + "source": [ + "# Simple case\n", + "x;" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "2f0c65a5-0a0e-4080-afce-5a8ed0d706df", + "metadata": {}, + "outputs": [], + "source": [ + "# Only skip the last expression\n", + "x; # E703\n", + "x;" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "5a3fd75d-26d9-44f7-b013-1684aabfd0ae", + "metadata": {}, + "outputs": [], + "source": [ + "# Nested expressions isn't relevant\n", + "if True:\n", + " x;" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "05eab5b9-e2ba-4954-8ef3-b035a79573fe", + "metadata": {}, + "outputs": [], + "source": [ + "# Semicolons with multiple expressions\n", + "x; x;" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "9cbbddc5-83fc-4fdb-81ab-53a3912ae898", + "metadata": {}, + "outputs": [], + "source": [ + "# Comments, newlines and whitespace\n", + "x; # comment\n", + "\n", + "# another comment" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (ruff-playground)", + "language": "python", + "name": "ruff-playground" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/crates/ruff_linter/src/checkers/tokens.rs b/crates/ruff_linter/src/checkers/tokens.rs index b0f563c23b91f..e9d23ed91c626 100644 --- a/crates/ruff_linter/src/checkers/tokens.rs +++ b/crates/ruff_linter/src/checkers/tokens.rs @@ -2,6 +2,8 @@ use std::path::Path; +use ruff_notebook::CellOffsets; +use ruff_python_ast::PySourceType; use ruff_python_parser::lexer::LexResult; use ruff_python_parser::Tok; @@ -25,7 +27,8 @@ pub(crate) fn check_tokens( locator: &Locator, indexer: &Indexer, settings: &LinterSettings, - is_stub: bool, + source_type: PySourceType, + cell_offsets: Option<&CellOffsets>, ) -> Vec { let mut diagnostics: Vec = vec![]; @@ -112,7 +115,14 @@ pub(crate) fn check_tokens( Rule::MultipleStatementsOnOneLineSemicolon, Rule::UselessSemicolon, ]) { - pycodestyle::rules::compound_statements(&mut diagnostics, tokens, locator, indexer); + pycodestyle::rules::compound_statements( + &mut diagnostics, + tokens, + locator, + indexer, + source_type, + cell_offsets, + ); } if settings.rules.enabled(Rule::AvoidableEscapedQuote) && settings.flake8_quotes.avoid_escape { @@ -156,7 +166,7 @@ pub(crate) fn check_tokens( pyupgrade::rules::extraneous_parentheses(&mut diagnostics, tokens, locator); } - if is_stub && settings.rules.enabled(Rule::TypeCommentInStub) { + if source_type.is_stub() && settings.rules.enabled(Rule::TypeCommentInStub) { flake8_pyi::rules::type_comment_in_stub(&mut diagnostics, locator, indexer); } diff --git a/crates/ruff_linter/src/linter.rs b/crates/ruff_linter/src/linter.rs index d14ef022217b2..f6382e8b30349 100644 --- a/crates/ruff_linter/src/linter.rs +++ b/crates/ruff_linter/src/linter.rs @@ -108,7 +108,8 @@ pub fn check_path( locator, indexer, settings, - source_type.is_stub(), + source_type, + source_kind.as_ipy_notebook().map(Notebook::cell_offsets), )); } diff --git a/crates/ruff_linter/src/rules/pycodestyle/mod.rs b/crates/ruff_linter/src/rules/pycodestyle/mod.rs index 583e696a36b0c..112be16261155 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/mod.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/mod.rs @@ -53,6 +53,7 @@ mod tests { #[test_case(Rule::TrueFalseComparison, Path::new("E712.py"))] #[test_case(Rule::TypeComparison, Path::new("E721.py"))] #[test_case(Rule::UselessSemicolon, Path::new("E70.py"))] + #[test_case(Rule::UselessSemicolon, Path::new("E703.ipynb"))] fn rules(rule_code: Rule, path: &Path) -> Result<()> { let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy()); let diagnostics = test_path( diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/compound_statements.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/compound_statements.rs index 6dcdd19c8b4ec..2b7434dd23c17 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/compound_statements.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/compound_statements.rs @@ -1,6 +1,8 @@ +use ruff_notebook::CellOffsets; +use ruff_python_ast::PySourceType; use ruff_python_parser::lexer::LexResult; use ruff_python_parser::Tok; -use ruff_text_size::TextRange; +use ruff_text_size::{TextRange, TextSize}; use ruff_diagnostics::{AlwaysFixableViolation, Violation}; use ruff_diagnostics::{Diagnostic, Edit, Fix}; @@ -101,6 +103,8 @@ pub(crate) fn compound_statements( lxr: &[LexResult], locator: &Locator, indexer: &Indexer, + source_type: PySourceType, + cell_offsets: Option<&CellOffsets>, ) { // Track the last seen instance of a variety of tokens. let mut colon = None; @@ -127,7 +131,13 @@ pub(crate) fn compound_statements( let mut sqb_count = 0u32; let mut brace_count = 0u32; - for &(ref tok, range) in lxr.iter().flatten() { + // Track indentation. + let mut indent = 0u32; + + // Keep the token iterator to perform lookaheads. + let mut tokens = lxr.iter().flatten(); + + while let Some(&(ref tok, range)) = tokens.next() { match tok { Tok::Lpar => { par_count = par_count.saturating_add(1); @@ -153,6 +163,12 @@ pub(crate) fn compound_statements( continue; } } + Tok::Indent => { + indent = indent.saturating_add(1); + } + Tok::Dedent => { + indent = indent.saturating_sub(1); + } _ => {} } @@ -163,15 +179,24 @@ pub(crate) fn compound_statements( match tok { Tok::Newline => { if let Some((start, end)) = semi { - let mut diagnostic = - Diagnostic::new(UselessSemicolon, TextRange::new(start, end)); - diagnostic.set_fix(Fix::safe_edit(Edit::deletion( - indexer - .preceded_by_continuations(start, locator) - .unwrap_or(start), - end, - ))); - diagnostics.push(diagnostic); + if !(source_type.is_ipynb() + && indent == 0 + && cell_offsets + .and_then(|cell_offsets| cell_offsets.containing_range(range.start())) + .is_some_and(|cell_range| { + !has_non_trivia_tokens_till(tokens.clone(), cell_range.end()) + })) + { + let mut diagnostic = + Diagnostic::new(UselessSemicolon, TextRange::new(start, end)); + diagnostic.set_fix(Fix::safe_edit(Edit::deletion( + indexer + .preceded_by_continuations(start, locator) + .unwrap_or(start), + end, + ))); + diagnostics.push(diagnostic); + } } // Reset. @@ -309,3 +334,23 @@ pub(crate) fn compound_statements( }; } } + +/// Returns `true` if there are any non-trivia tokens from the given token +/// iterator till the given end offset. +fn has_non_trivia_tokens_till<'a>( + tokens: impl Iterator, + cell_end: TextSize, +) -> bool { + for &(ref tok, tok_range) in tokens { + if tok_range.start() >= cell_end { + return false; + } + if !matches!( + tok, + Tok::Newline | Tok::Comment(_) | Tok::EndOfFile | Tok::NonLogicalNewline + ) { + return true; + } + } + false +} diff --git a/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__E703_E703.ipynb.snap b/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__E703_E703.ipynb.snap new file mode 100644 index 0000000000000..068f245637b5c --- /dev/null +++ b/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__E703_E703.ipynb.snap @@ -0,0 +1,46 @@ +--- +source: crates/ruff_linter/src/rules/pycodestyle/mod.rs +--- +E703.ipynb:5:2: E703 [*] Statement ends with an unnecessary semicolon + | +3 | x; +4 | # Only skip the last expression +5 | x; # E703 + | ^ E703 +6 | x; +7 | # Nested expressions isn't relevant + | + = help: Remove unnecessary semicolon + +ℹ Safe fix +2 2 | # Simple case +3 3 | x; +4 4 | # Only skip the last expression +5 |-x; # E703 + 5 |+x # E703 +6 6 | x; +7 7 | # Nested expressions isn't relevant +8 8 | if True: + +E703.ipynb:9:6: E703 [*] Statement ends with an unnecessary semicolon + | + 7 | # Nested expressions isn't relevant + 8 | if True: + 9 | x; + | ^ E703 +10 | # Semicolons with multiple expressions +11 | x; x; + | + = help: Remove unnecessary semicolon + +ℹ Safe fix +6 6 | x; +7 7 | # Nested expressions isn't relevant +8 8 | if True: +9 |- x; + 9 |+ x +10 10 | # Semicolons with multiple expressions +11 11 | x; x; +12 12 | # Comments, newlines and whitespace + +