-
Notifications
You must be signed in to change notification settings - Fork 450
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This commit enables support for compiling regular expressions that can match on arbitrary byte slices. In particular, we add a new sub-module called `bytes` that duplicates the API of the top-level module, except `&str` for subjects is replaced by `&[u8]`. Additionally, Unicode support in the regular expression is disabled by default but can be selectively re-enabled with the `u` flag. (Unicode support cannot be selectively disabled in the standard top-level API.) Most of the interesting changes occurred in the `regex-syntax` crate, where the AST now explicitly distinguishes between "ASCII compatible" expressions and Unicode aware expressions. This PR makes a few other changes out of convenience: 1. The DFA now knows how to "give up" if it's flushing its cache too often. When the DFA gives up, either backtracking or the NFA algorithm take over, which provides better performance. 2. Benchmarks were added for Oniguruma. 3. The benchmarks in general were overhauled to be defined in one place by using conditional compilation. 4. The tests have been completely reorganized to make it easier to split up the tests depending on which regex engine we're using. For example, we occasionally need to be able to write tests specifically for `regex::Regex` or specifically for `regex::bytes::Regex`. 5. Fixes a bug where NUL bytes weren't represented correctly in the byte class optimization for the DFA. Closes #85.
- Loading branch information
1 parent
82bd6a8
commit d98ec1b
Showing
75 changed files
with
5,401 additions
and
1,915 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
[package] | ||
publish = false | ||
name = "regex-benchmark" | ||
version = "0.1.0" | ||
authors = ["The Rust Project Developers"] | ||
license = "MIT/Apache-2.0" | ||
repository = "https://github.com/rust-lang/regex" | ||
documentation = "http://doc.rust-lang.org/regex/regex_syntax/index.html" | ||
homepage = "https://github.com/rust-lang/regex" | ||
description = "Regex benchmarks for Rust's and other engines." | ||
|
||
[dependencies] | ||
enum-set = "0.0.6" | ||
lazy_static = "0.1" | ||
onig = { version = "0.4", optional = true } | ||
pcre = { version = "0.2", optional = true } | ||
rand = "0.3" | ||
regex = { version = "0.1", path = ".." } | ||
regex_macros = { version = "0.1", path = "../regex_macros", optional = true } | ||
regex-syntax = { version = "0.2", path = "../regex-syntax" } | ||
|
||
# Use features to conditionally compile benchmarked regexes, since not every | ||
# regex works on every engine. :-( | ||
[features] | ||
re-pcre = ["pcre"] | ||
re-onig = ["onig"] | ||
re-rust = [] | ||
re-rust-bytes = [] | ||
re-rust-plugin = ["regex_macros"] | ||
|
||
# Run the benchmarks on the default behavior of Regex::new. | ||
[[bench]] | ||
name = "rust" | ||
path = "src/bench_rust.rs" | ||
test = false | ||
bench = true | ||
|
||
# Run the benchmarks on the default behavior of bytes::Regex::new. | ||
[[bench]] | ||
name = "rust-bytes" | ||
path = "src/bench_rust_bytes.rs" | ||
test = false | ||
bench = true | ||
|
||
# Run the benchmarks on the default behavior of the `regex!` compiler plugin. | ||
[[bench]] | ||
name = "rust-plugin" | ||
path = "src/bench_rust_plugin.rs" | ||
test = false | ||
bench = true | ||
|
||
# Run the benchmarks on PCRE. | ||
[[bench]] | ||
name = "pcre" | ||
path = "src/bench_pcre.rs" | ||
test = false | ||
bench = true | ||
|
||
# Run the benchmarks on Oniguruma. | ||
[[bench]] | ||
name = "onig" | ||
path = "src/bench_onig.rs" | ||
test = false | ||
bench = true | ||
|
||
[profile.bench] | ||
debug = true | ||
|
||
[profile.test] | ||
debug = true |
Oops, something went wrong.