Skip to content

Commit

Permalink
Use AhoCorasick to speed up quote match (#9773)
Browse files Browse the repository at this point in the history
<!--
Thank you for contributing to Ruff! To help us out with reviewing,
please consider the following:

- Does this pull request include a summary of the change? (See below.)
- Does this pull request include a descriptive title?
- Does this pull request include references to any relevant issues?
-->

## Summary

When I was looking at the v0.2.0 release, this method showed up in a
CodSpeed regression (we were calling it more), so I decided to quickly
look at speeding it up. @BurntSushi suggested using Aho-Corasick, and it
looks like it's about 7 or 8x faster:

```text
Parser/AhoCorasick      time:   [8.5646 ns 8.5914 ns 8.6191 ns]
Parser/Iterator         time:   [64.992 ns 65.124 ns 65.271 ns]
```

## Test Plan

`cargo test`
  • Loading branch information
charliermarsh authored Feb 2, 2024
1 parent b947dde commit ea1c089
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 31 deletions.
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion crates/ruff_linter/src/checkers/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ impl<'a> Checker<'a> {
let trailing_quote = trailing_quote(self.locator.slice(string_range))?;

// Invert the quote character, if it's a single quote.
match *trailing_quote {
match trailing_quote {
"'" => Some(Quote::Double),
"\"" => Some(Quote::Single),
_ => None,
Expand Down
2 changes: 1 addition & 1 deletion crates/ruff_linter/src/rules/pyupgrade/rules/f_strings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ fn try_convert_to_f_string(
converted.push(']');
}
FieldNamePart::StringIndex(index) => {
let quote = match *trailing_quote {
let quote = match trailing_quote {
"'" | "'''" | "\"\"\"" => '"',
"\"" => '\'',
_ => unreachable!("invalid trailing quote"),
Expand Down
2 changes: 2 additions & 0 deletions crates/ruff_python_ast/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@ ruff_python_trivia = { path = "../ruff_python_trivia" }
ruff_source_file = { path = "../ruff_source_file" }
ruff_text_size = { path = "../ruff_text_size" }

aho-corasick = { workspace = true }
bitflags = { workspace = true }
is-macro = { workspace = true }
itertools = { workspace = true }
once_cell = { workspace = true }
rustc-hash = { workspace = true }
serde = { workspace = true, optional = true }
smallvec = { workspace = true }
Expand Down
62 changes: 33 additions & 29 deletions crates/ruff_python_ast/src/str.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
use aho_corasick::{AhoCorasick, AhoCorasickKind, Anchored, Input, MatchKind, StartKind};
use once_cell::sync::Lazy;

use ruff_text_size::{TextLen, TextRange};

/// Includes all permutations of `r`, `u`, `f`, and `fr` (`ur` is invalid, as is `uf`). This
Expand Down Expand Up @@ -124,18 +127,6 @@ pub const SINGLE_QUOTE_BYTE_PREFIXES: &[&str] = &[
"b'",
];

#[rustfmt::skip]
const TRIPLE_QUOTE_SUFFIXES: &[&str] = &[
"\"\"\"",
"'''",
];

#[rustfmt::skip]
const SINGLE_QUOTE_SUFFIXES: &[&str] = &[
"\"",
"'",
];

/// Strip the leading and trailing quotes from a string.
/// Assumes that the string is a valid string literal, but does not verify that the string
/// is a "simple" string literal (i.e., that it does not contain any implicit concatenations).
Expand All @@ -155,28 +146,41 @@ pub fn raw_contents_range(contents: &str) -> Option<TextRange> {
))
}

/// An [`AhoCorasick`] matcher for string and byte literal prefixes.
static PREFIX_MATCHER: Lazy<AhoCorasick> = Lazy::new(|| {
AhoCorasick::builder()
.start_kind(StartKind::Anchored)
.match_kind(MatchKind::LeftmostLongest)
.kind(Some(AhoCorasickKind::DFA))
.build(
TRIPLE_QUOTE_STR_PREFIXES
.iter()
.chain(TRIPLE_QUOTE_BYTE_PREFIXES)
.chain(SINGLE_QUOTE_STR_PREFIXES)
.chain(SINGLE_QUOTE_BYTE_PREFIXES),
)
.unwrap()
});

/// Return the leading quote for a string or byte literal (e.g., `"""`).
pub fn leading_quote(content: &str) -> Option<&str> {
TRIPLE_QUOTE_STR_PREFIXES
.iter()
.chain(TRIPLE_QUOTE_BYTE_PREFIXES)
.chain(SINGLE_QUOTE_STR_PREFIXES)
.chain(SINGLE_QUOTE_BYTE_PREFIXES)
.find_map(|pattern| {
if content.starts_with(pattern) {
Some(*pattern)
} else {
None
}
})
let mat = PREFIX_MATCHER.find(Input::new(content).anchored(Anchored::Yes))?;
Some(&content[mat.start()..mat.end()])
}

/// Return the trailing quote string for a string or byte literal (e.g., `"""`).
pub fn trailing_quote(content: &str) -> Option<&&str> {
TRIPLE_QUOTE_SUFFIXES
.iter()
.chain(SINGLE_QUOTE_SUFFIXES)
.find(|&pattern| content.ends_with(pattern))
pub fn trailing_quote(content: &str) -> Option<&str> {
if content.ends_with("'''") {
Some("'''")
} else if content.ends_with("\"\"\"") {
Some("\"\"\"")
} else if content.ends_with('\'') {
Some("'")
} else if content.ends_with('\"') {
Some("\"")
} else {
None
}
}

/// Return `true` if the string is a triple-quote string or byte prefix.
Expand Down

0 comments on commit ea1c089

Please sign in to comment.