Skip to content

Commit

Permalink
Add benchmarks for D's ctRegex
Browse files Browse the repository at this point in the history
  • Loading branch information
mrmonday authored and BurntSushi committed Jan 1, 2018
1 parent 49f2a3d commit ed174df
Show file tree
Hide file tree
Showing 6 changed files with 133 additions and 2 deletions.
2 changes: 2 additions & 0 deletions bench/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ re-onig = ["onig"]
re-re2 = []
re-dphobos = []
re-dphobos-dmd = ["re-dphobos"]
re-dphobos-dmd-ct = ["re-dphobos-dmd"]
re-dphobos-ldc = ["re-dphobos"]
re-dphobos-ldc-ct = ["re-dphobos-ldc"]
re-rust = []
re-rust-bytes = []
re-tcl = []
Expand Down
11 changes: 11 additions & 0 deletions bench/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,19 @@ fn main() {

let out_dir = env::var("OUT_DIR").unwrap();
let out_file = &format!("-of={}/libdphobos-dmd.a", out_dir);
let is_compile_time = env::var("CARGO_FEATURE_RE_DPHOBOS_DMD_CT").is_ok();
let extra_args = if is_compile_time { vec!["-version=CtRegex"] } else { vec![] };

let res = process::Command::new("dmd")
.arg("-w")
.arg("-lib")
.arg("-O")
.arg("-release")
.arg("-inline")
.arg("-Isrc/ffi")
.args(extra_args)
.arg("src/ffi/d_phobos.d")
.arg("src/ffi/d_phobos_ct.d")
.arg(out_file)
.output()
.expect("unable to compile dphobos-regex (dmd)");
Expand All @@ -75,13 +80,19 @@ fn main() {
let out_dir = env::var("OUT_DIR").unwrap();
let out_file = &format!("-of={}/libdphobos-ldc.a", out_dir);

let is_compile_time = env::var("CARGO_FEATURE_RE_DPHOBOS_LDC_CT").is_ok();
let extra_args = if is_compile_time { vec!["-d-version=CtRegex"] } else { vec![] };

let res = process::Command::new("ldc")
.arg("-w")
.arg("-lib")
.arg("-O3")
.arg("-release")
.arg("-mcpu=native")
.arg("-Isrc/ffi")
.args(extra_args)
.arg("src/ffi/d_phobos.d")
.arg("src/ffi/d_phobos_ct.d")
.arg(out_file)
.output()
.expect("unable to compile dphobos-regex (ldc)");
Expand Down
8 changes: 7 additions & 1 deletion bench/run
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash

usage() {
echo "Usage: $(basename $0) [dphobos-dmd | dphobos-ldc | rust | rust-bytes | pcre1 | pcre2 | re2 | onig | tcl ]" >&2
echo "Usage: $(basename $0) [dphobos-dmd | dphobos-ldc | dphobos-dmd-ct | dphobos-ldc-ct | rust | rust-bytes | pcre1 | pcre2 | re2 | onig | tcl ]" >&2
exit 1
}

Expand All @@ -23,6 +23,12 @@ case $which in
dphobos-ldc)
exec cargo bench --bench bench --features re-dphobos-ldc "$@"
;;
dphobos-dmd-ct)
exec cargo bench --bench bench --features re-dphobos-dmd-ct "$@"
;;
dphobos-ldc-ct)
exec cargo bench --bench bench --features re-dphobos-ldc-ct "$@"
;;
rust)
exec cargo bench --bench bench --features re-rust "$@"
;;
Expand Down
9 changes: 8 additions & 1 deletion bench/src/ffi/d_phobos.d
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import std.typecons;

import std.stdio;

import d_phobos_ct;

auto rustRegexToD(string regex) {
auto flags = "g";
if (regex.startsWith("(?i)")) {
Expand All @@ -27,7 +29,12 @@ extern(C):
void* d_phobos_regex_new(string s) {
auto r = cast(Regex!char*)malloc(Regex!char.sizeof);

*r = regex(rustRegexToD(s).expand);
version(CtRegex) {
auto ctR = getCtRegex();
*r = ctR[s];
} else {
*r = regex(rustRegexToD(s).expand);
}

return r;
}
Expand Down
103 changes: 103 additions & 0 deletions bench/src/ffi/d_phobos_ct.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
module d_phobos_ct;

version(CtRegex):

static immutable PATTERNS = [
r"y", // misc::literal
r".y", // misc::not_literal
"[abcdw]", // misc::match_class
"[ac]", // misc::match_class_in_range
r"\p{L}", // misc::match_class_unicode / sherlock::letters
r"^zbc(d|e)", // misc::anchored_literal_long_non_match / misc::anchored_literal_short_non_match
r"^.bc(d|e)", // misc::anchored_literal_short_match / misc::anchored_literal_long_match
r"^.bc(d|e)*$", // misc::one_pass_short
r".bc(d|e)*$", // misc::one_pass_short_not
r"^abcdefghijklmnopqrstuvwxyz.*$", // misc::one_pass_long_prefix
r"^.bcdefghijklmnopqrstuvwxyz.*$", // misc::one_pass_long_prefix_not
r"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaab", // misc::long_needle1
r"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbba", // misc::long_needle2
r"[r-z].*bcdefghijklmnopq", // misc::reverse_suffix_no_quadratic
"ABCDEFGHIJKLMNOPQRSTUVWXYZ$", // misc::easy0
r"A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$", // misc::easy1
r"[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$", // misc::medium
r"[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$", // misc::hard
r"[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ.*", // misc::reallyhard
r"\w+\s+Holmes", // misc::reallyhard2
// This causes compile times to go from ~40s to ~9m with dmd 2.077.1
//r"a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // misc::no_exponential
r">[^\n]*\n|\n", // dna::find_new_lines
r"agggtaaa|tttaccct", // dna::variant1
r"[cgt]gggtaaa|tttaccc[acg]", // dna::variant2
r"a[act]ggtaaa|tttacc[agt]t", // dna::variant3
r"ag[act]gtaaa|tttac[agt]ct", // dna::variant4
r"agg[act]taaa|ttta[agt]cct", // dna::variant5
r"aggg[acg]aaa|ttt[cgt]ccct", // dna::variant6
r"agggt[cgt]aa|tt[acg]accct", // dna::variant7
r"agggta[cgt]a|t[acg]taccct", // dna::variant8
r"agggtaa[cgt]|[acg]ttaccct", // dna::variant9
r"B", // dna::subst1
r"D", // dna::subst2
r"H", // dna::subst3
r"K", // dna::subst4
r"M", // dna::subst5
r"N", // dna::subst6
r"R", // dna::subst7
r"S", // dna::subst8
r"V", // dna::subst9
r"W", // dna::subst10
r"Y", // dna::subst11
r"Sherlock", // sherlock::name_sherlock
r"Holmes", // sherlock::name_holmes
r"Sherlock Holmes", // sherlock::name_sherlock_holmes
r"(?i)Sherlock", // sherlock::name_sherlock_nocase
r"(?i)Holmes", // sherlock::name_holmes_nocase
r"(?i)Sherlock Holmes", // sherlock::name_sherlock_holmes_nocase
r"Sherlock\s+Holmes", // sherlock::name_whitespace
r"Sherlock|Street", // sherlock::name_alt1
r"Sherlock|Holmes", // sherlock::name_alt2
r"Sherlock|Holmes|Watson|Irene|Adler|John|Baker", // sherlock::name_alt3
r"(?i)Sherlock|Holmes|Watson|Irene|Adler|John|Baker", // sherlock::name_alt3_nocase
r"Sher[a-z]+|Hol[a-z]+", // sherlock::name_alt4
r"(?i)Sher[a-z]+|Hol[a-z]+", // sherlock::name_alt4_nocase
r"Sherlock|Holmes|Watson", // sherlock::name_alt5
r"(?i)Sherlock|Holmes|Watson", // sherlock::name_alt5_nocase
r"zqj", // sherlock::no_match_uncommon
r"aqj", // sherlock::no_match_common
r"aei", // sherlock::no_match_really_common
r"the", // sherlock::the_lower
r"The", // sherlock::the_upper
r"(?i)the", // sherlock::the_nocase
r"the\s+\w+", // sherlock::the_whitespace
r"\p{Lu}", // sherlock::letters_upper
r"\p{Ll}", // sherlock::letters_lower
r"\w+", // sherlock::words
r"\w+\s+Holmes", // sherlock::before_holmes
r"\w+\s+Holmes\s+\w+", // sherlock::before_after_holmes
r"Holmes.{0,25}Watson|Watson.{0,25}Holmes", // sherlock::holmes_cochar_watson
r"Holmes(?:\s*.+\s*){0,10}Watson|Watson(?:\s*.+\s*){0,10}Holmes", // sherlock::holmes_coword_watson
`["'][^"']{0,30}[?!.]["']`, // sherlock::quotes
r"(?m)^Sherlock Holmes|Sherlock Holmes$", // sherlock::line_boundary_sherlock_holmes
r"\b\w+n\b", // sherlock::word_ending_n
r"[a-q][^u-z]{13}x", // sherlock::repeated_class_negation
r"[a-zA-Z]+ing", // sherlock::ing_suffix
r"\s[a-zA-Z]{0,12}ing\s", // sherlock::ing_suffix_limited_space
];

public auto getCtRegex() {
import std.regex;
import std.string;

Regex!char[string] aa;

static foreach (pattern; PATTERNS) {
static if (pattern.startsWith("(?i)")) {
aa[pattern] = ctRegex!(pattern[4..$], "gi");
} else static if (pattern.startsWith("(?m)")) {
aa[pattern] = ctRegex!(pattern[4..$], "gm");
} else {
aa[pattern] = ctRegex!(pattern, "g");
}
}

return aa;
}
2 changes: 2 additions & 0 deletions bench/src/misc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ use {Regex, Text};
#[cfg(not(feature = "re-onig"))]
#[cfg(not(feature = "re-pcre1"))]
#[cfg(not(feature = "re-pcre2"))]
#[cfg(not(feature = "re-dphobos-dmd-ct"))]
#[cfg(not(feature = "re-dphobos-ldc-ct"))]
bench_match!(no_exponential, {
format!(
"{}{}",
Expand Down

0 comments on commit ed174df

Please sign in to comment.