Skip to content

Commit

Permalink
Auto merge of #343 - BurntSushi:fixes, r=BurntSushi
Browse files Browse the repository at this point in the history
Fixes

This PR contains a series of commits that fixes several minor bugs.

Fixes #321, Fixes #334, Fixes #326, Fixes #333, Fixes #338
  • Loading branch information
bors committed Feb 18, 2017
2 parents 7dfa895 + 9ae9418 commit 7297f23
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 15 deletions.
8 changes: 5 additions & 3 deletions regex-syntax/src/literals.rs
Original file line number Diff line number Diff line change
Expand Up @@ -819,7 +819,7 @@ fn repeat_range_literals<F: FnMut(&Expr, &mut Literals)>(
let n = cmp::min(lits.limit_size, min as usize);
let es = iter::repeat(e.clone()).take(n).collect();
f(&Concat(es), lits);
if n < min as usize {
if n < min as usize || lits.contains_empty() {
lits.cut();
}
}
Expand Down Expand Up @@ -1156,8 +1156,9 @@ mod tests {

// Test regexes with empty assertions.
test_lit!(pfx_empty1, prefixes, "^a", M("a"));
test_lit!(pfx_empty2, prefixes, "^abc", M("abc"));
test_lit!(pfx_empty3, prefixes, "(?:^abc)|(?:^z)", M("abc"), M("z"));
test_lit!(pfx_empty2, prefixes, "a${2}", C("a"));
test_lit!(pfx_empty3, prefixes, "^abc", M("abc"));
test_lit!(pfx_empty4, prefixes, "(?:^abc)|(?:^z)", M("abc"), M("z"));

// Make sure some curious regexes have no prefixes.
test_lit!(pfx_nothing1, prefixes, ".");
Expand Down Expand Up @@ -1306,6 +1307,7 @@ mod tests {

// Test regexes with empty assertions.
test_lit!(sfx_empty1, suffixes, "a$", M("a"));
test_lit!(sfx_empty2, suffixes, "${2}a", C("a"));

// Make sure some curious regexes have no suffixes.
test_lit!(sfx_nothing1, suffixes, ".");
Expand Down
9 changes: 6 additions & 3 deletions src/exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -850,9 +850,12 @@ impl<'c> ExecNoSync<'c> {
match_start: usize,
match_end: usize,
) -> Option<(usize, usize)> {
// We can't use match_end directly, because we may need to examine
// one "character" after the end of a match for lookahead operators.
let e = cmp::min(next_utf8(text, match_end), text.len());
// We can't use match_end directly, because we may need to examine one
// "character" after the end of a match for lookahead operators. We
// need to move two characters beyond the end, since some look-around
// operations may falsely assume a premature end of text otherwise.
let e = cmp::min(
next_utf8(text, next_utf8(text, match_end)), text.len());
self.captures_nfa(slots, &text[..e], match_start)
}

Expand Down
4 changes: 4 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,10 @@ assert_eq!(after, "03/14/2012, 01/01/2013 and 07/05/2014");
# }
```
If you wish to match against whitespace in this mode, you can still use `\s`,
`\n`, `\t`, etc. For escaping a single space character, you can use its hex
character code `\x20` or temporarily disable the `x` flag, e.g., `(?-x: )`.
# Example: match multiple regular expressions simultaneously
This demonstrates how to use a `RegexSet` to match multiple (possibly
Expand Down
4 changes: 0 additions & 4 deletions src/re_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,6 @@ impl RegexBuilder {
}

/// Set the value for the Unicode (`u`) flag.
///
/// For byte based regular expressions, this is disabled by default.
pub fn unicode(&mut self, yes: bool) -> &mut RegexBuilder {
self.0.unicode = yes;
self
Expand Down Expand Up @@ -228,8 +226,6 @@ impl RegexSetBuilder {
}

/// Set the value for the Unicode (`u`) flag.
///
/// For byte based regular expressions, this is disabled by default.
pub fn unicode(&mut self, yes: bool) -> &mut RegexSetBuilder {
self.0.unicode = yes;
self
Expand Down
37 changes: 32 additions & 5 deletions src/re_bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -427,12 +427,23 @@ impl Regex {
/// Note that using `$2` instead of `$first` or `$1` instead of `$last`
/// would produce the same result. To write a literal `$` use `$$`.
///
/// If `$name` isn't a valid capture group (whether the name doesn't exist
/// or isn't a valid index), then it is replaced with the empty string.
/// Sometimes the replacement string requires use of curly braces to
/// delineate a capture group replacement and surrounding literal text.
/// For example, if we wanted to join two words together with an
/// underscore:
///
/// The longest possible name is used. e.g., `$1a` looks up the capture
/// group named `1a` and not the capture group at index `1`. To exert more
/// precise control over the name, use braces, e.g., `${1}a`.
/// ```rust
/// # extern crate regex; use regex::bytes::Regex;
/// # fn main() {
/// let re = Regex::new(r"(?P<first>\w+)\s+(?P<second>\w+)").unwrap();
/// let result = re.replace(b"deep fried", &b"${first}_$second"[..]);
/// assert_eq!(result, &b"deep_fried"[..]);
/// # }
/// ```
///
/// Without the curly braces, the capture group name `first_` would be
/// used, and since it doesn't exist, it would be replaced with the empty
/// string.
///
/// Finally, sometimes you just want to replace a literal string with no
/// regard for capturing group expansion. This can be done by wrapping a
Expand Down Expand Up @@ -778,6 +789,22 @@ impl<'t> Captures<'t> {
/// Returns the match associated with the capture group at index `i`. If
/// `i` does not correspond to a capture group, or if the capture group
/// did not participate in the match, then `None` is returned.
///
/// # Examples
///
/// Get the text of the match with a default of an empty string if this
/// group didn't participate in the match:
///
/// ```rust
/// # use regex::bytes::Regex;
/// let re = Regex::new(r"[a-z]+(?:([0-9]+)|([A-Z]+))").unwrap();
/// let caps = re.captures(b"abc123").unwrap();
///
/// let text1 = caps.get(1).map_or(&b""[..], |m| m.as_bytes());
/// let text2 = caps.get(2).map_or(&b""[..], |m| m.as_bytes());
/// assert_eq!(text1, &b"123"[..]);
/// assert_eq!(text2, &b""[..]);
/// ```
pub fn get(&self, i: usize) -> Option<Match<'t>> {
self.locs.pos(i).map(|(s, e)| Match::new(self.text, s, e))
}
Expand Down
34 changes: 34 additions & 0 deletions src/re_unicode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,24 @@ impl Regex {
/// Note that using `$2` instead of `$first` or `$1` instead of `$last`
/// would produce the same result. To write a literal `$` use `$$`.
///
/// Sometimes the replacement string requires use of curly braces to
/// delineate a capture group replacement and surrounding literal text.
/// For example, if we wanted to join two words together with an
/// underscore:
///
/// ```rust
/// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let re = Regex::new(r"(?P<first>\w+)\s+(?P<second>\w+)").unwrap();
/// let result = re.replace("deep fried", "${first}_$second");
/// assert_eq!(result, "deep_fried");
/// # }
/// ```
///
/// Without the curly braces, the capture group name `first_` would be
/// used, and since it doesn't exist, it would be replaced with the empty
/// string.
///
/// Finally, sometimes you just want to replace a literal string with no
/// regard for capturing group expansion. This can be done by wrapping a
/// byte string with `NoExpand`:
Expand Down Expand Up @@ -916,6 +934,22 @@ impl<'t> Captures<'t> {
/// Returns the match associated with the capture group at index `i`. If
/// `i` does not correspond to a capture group, or if the capture group
/// did not participate in the match, then `None` is returned.
///
/// # Examples
///
/// Get the text of the match with a default of an empty string if this
/// group didn't participate in the match:
///
/// ```rust
/// # use regex::Regex;
/// let re = Regex::new(r"[a-z]+(?:([0-9]+)|([A-Z]+))").unwrap();
/// let caps = re.captures("abc123").unwrap();
///
/// let text1 = caps.get(1).map_or("", |m| m.as_str());
/// let text2 = caps.get(2).map_or("", |m| m.as_str());
/// assert_eq!(text1, "123");
/// assert_eq!(text2, "");
/// ```
pub fn get(&self, i: usize) -> Option<Match<'t>> {
self.locs.pos(i).map(|(s, e)| Match::new(self.text, s, e))
}
Expand Down
8 changes: 8 additions & 0 deletions tests/regression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,11 @@ mat!(endl_or_wb, r"(?m:$)|(?-u:\b)", "\u{6084e}", Some((4, 4)));
mat!(zero_or_end, r"(?i-u:\x00)|$", "\u{e682f}", Some((4, 4)));
mat!(y_or_endl, r"(?i-u:y)|(?m:$)", "\u{b4331}", Some((4, 4)));
mat!(wb_start_x, r"(?u:\b)^(?-u:X)", "X", Some((0, 1)));

// See: https://github.com/rust-lang/regex/issues/321
ismatch!(strange_anchor_non_complete_prefix, r"a^{2}", "", false);
ismatch!(strange_anchor_non_complete_suffix, r"${2}a", "", false);

// See: https://github.com/rust-lang/regex/issues/334
mat!(captures_after_dfa_premature_end, r"a(b*(X|$))?", "abcbX",
Some((0, 1)), None, None);

0 comments on commit 7297f23

Please sign in to comment.