diff --git a/Cargo.lock b/Cargo.lock index d7fa4e45487c8..499e533ed3729 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2264,10 +2264,12 @@ dependencies = [ name = "ruff_python_ast" version = "0.0.0" dependencies = [ + "aho-corasick", "bitflags 2.4.1", "insta", "is-macro", "itertools 0.12.0", + "once_cell", "ruff_python_parser", "ruff_python_trivia", "ruff_source_file", diff --git a/crates/ruff_linter/src/checkers/ast/mod.rs b/crates/ruff_linter/src/checkers/ast/mod.rs index d13a7a9270c0b..6267e5f138e77 100644 --- a/crates/ruff_linter/src/checkers/ast/mod.rs +++ b/crates/ruff_linter/src/checkers/ast/mod.rs @@ -197,7 +197,7 @@ impl<'a> Checker<'a> { let trailing_quote = trailing_quote(self.locator.slice(string_range))?; // Invert the quote character, if it's a single quote. - match *trailing_quote { + match trailing_quote { "'" => Some(Quote::Double), "\"" => Some(Quote::Single), _ => None, diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/f_strings.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/f_strings.rs index 2955417791428..2300c4b353eb2 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/rules/f_strings.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/rules/f_strings.rs @@ -298,7 +298,7 @@ fn try_convert_to_f_string( converted.push(']'); } FieldNamePart::StringIndex(index) => { - let quote = match *trailing_quote { + let quote = match trailing_quote { "'" | "'''" | "\"\"\"" => '"', "\"" => '\'', _ => unreachable!("invalid trailing quote"), diff --git a/crates/ruff_python_ast/Cargo.toml b/crates/ruff_python_ast/Cargo.toml index 75a25062ee2cb..b61435355fa07 100644 --- a/crates/ruff_python_ast/Cargo.toml +++ b/crates/ruff_python_ast/Cargo.toml @@ -17,9 +17,11 @@ ruff_python_trivia = { path = "../ruff_python_trivia" } ruff_source_file = { path = "../ruff_source_file" } ruff_text_size = { path = "../ruff_text_size" } +aho-corasick = { workspace = true } bitflags = { workspace = true } is-macro = { workspace = true } itertools = { workspace = true } +once_cell = { workspace = true } rustc-hash = { workspace = true } serde = { workspace = true, optional = true } smallvec = { workspace = true } diff --git a/crates/ruff_python_ast/src/str.rs b/crates/ruff_python_ast/src/str.rs index 8ddff85958994..1e05253f65cbb 100644 --- a/crates/ruff_python_ast/src/str.rs +++ b/crates/ruff_python_ast/src/str.rs @@ -1,3 +1,6 @@ +use aho_corasick::{AhoCorasick, AhoCorasickKind, Anchored, Input, MatchKind, StartKind}; +use once_cell::sync::Lazy; + use ruff_text_size::{TextLen, TextRange}; /// Includes all permutations of `r`, `u`, `f`, and `fr` (`ur` is invalid, as is `uf`). This @@ -124,18 +127,6 @@ pub const SINGLE_QUOTE_BYTE_PREFIXES: &[&str] = &[ "b'", ]; -#[rustfmt::skip] -const TRIPLE_QUOTE_SUFFIXES: &[&str] = &[ - "\"\"\"", - "'''", -]; - -#[rustfmt::skip] -const SINGLE_QUOTE_SUFFIXES: &[&str] = &[ - "\"", - "'", -]; - /// Strip the leading and trailing quotes from a string. /// Assumes that the string is a valid string literal, but does not verify that the string /// is a "simple" string literal (i.e., that it does not contain any implicit concatenations). @@ -155,28 +146,41 @@ pub fn raw_contents_range(contents: &str) -> Option { )) } +/// An [`AhoCorasick`] matcher for string and byte literal prefixes. +static PREFIX_MATCHER: Lazy = Lazy::new(|| { + AhoCorasick::builder() + .start_kind(StartKind::Anchored) + .match_kind(MatchKind::LeftmostLongest) + .kind(Some(AhoCorasickKind::DFA)) + .build( + TRIPLE_QUOTE_STR_PREFIXES + .iter() + .chain(TRIPLE_QUOTE_BYTE_PREFIXES) + .chain(SINGLE_QUOTE_STR_PREFIXES) + .chain(SINGLE_QUOTE_BYTE_PREFIXES), + ) + .unwrap() +}); + /// Return the leading quote for a string or byte literal (e.g., `"""`). pub fn leading_quote(content: &str) -> Option<&str> { - TRIPLE_QUOTE_STR_PREFIXES - .iter() - .chain(TRIPLE_QUOTE_BYTE_PREFIXES) - .chain(SINGLE_QUOTE_STR_PREFIXES) - .chain(SINGLE_QUOTE_BYTE_PREFIXES) - .find_map(|pattern| { - if content.starts_with(pattern) { - Some(*pattern) - } else { - None - } - }) + let mat = PREFIX_MATCHER.find(Input::new(content).anchored(Anchored::Yes))?; + Some(&content[mat.start()..mat.end()]) } /// Return the trailing quote string for a string or byte literal (e.g., `"""`). -pub fn trailing_quote(content: &str) -> Option<&&str> { - TRIPLE_QUOTE_SUFFIXES - .iter() - .chain(SINGLE_QUOTE_SUFFIXES) - .find(|&pattern| content.ends_with(pattern)) +pub fn trailing_quote(content: &str) -> Option<&str> { + if content.ends_with("'''") { + Some("'''") + } else if content.ends_with("\"\"\"") { + Some("\"\"\"") + } else if content.ends_with('\'') { + Some("'") + } else if content.ends_with('\"') { + Some("\"") + } else { + None + } } /// Return `true` if the string is a triple-quote string or byte prefix.