Skip to content

Commit

Permalink
Fix tidy and rebase fallout
Browse files Browse the repository at this point in the history
Added a few bugfixes and additional testcases
  • Loading branch information
Kimundi committed Feb 19, 2015
1 parent c1de0a0 commit a641996
Show file tree
Hide file tree
Showing 4 changed files with 171 additions and 50 deletions.
33 changes: 0 additions & 33 deletions src/libcollections/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2893,22 +2893,6 @@ mod bench {
b.iter(|| assert_eq!(s.split('V').count(), 3));
}

#[bench]
fn split_unicode_not_ascii(b: &mut Bencher) {
struct NotAscii(char);
impl CharEq for NotAscii {
fn matches(&mut self, c: char) -> bool {
let NotAscii(cc) = *self;
cc == c
}
fn only_ascii(&self) -> bool { false }
}
let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";

b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
}


#[bench]
fn split_ascii(b: &mut Bencher) {
let s = "Mary had a little lamb, Little lamb, little-lamb.";
Expand All @@ -2917,23 +2901,6 @@ mod bench {
b.iter(|| assert_eq!(s.split(' ').count(), len));
}

#[bench]
fn split_not_ascii(b: &mut Bencher) {
struct NotAscii(char);
impl CharEq for NotAscii {
#[inline]
fn matches(&mut self, c: char) -> bool {
let NotAscii(cc) = *self;
cc == c
}
fn only_ascii(&self) -> bool { false }
}
let s = "Mary had a little lamb, Little lamb, little-lamb.";
let len = s.split(' ').count();

b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
}

#[bench]
fn split_extern_fn(b: &mut Bencher) {
let s = "Mary had a little lamb, Little lamb, little-lamb.";
Expand Down
15 changes: 8 additions & 7 deletions src/libcore/str/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,6 @@ impl FromStr for bool {

/// An error returned when parsing a `bool` from a string fails.
#[derive(Debug, Clone, PartialEq)]
#[allow(missing_copy_implementations)]
#[stable(feature = "rust1", since = "1.0.0")]
pub struct ParseBoolError { _priv: () }

Expand Down Expand Up @@ -235,7 +234,7 @@ pub unsafe fn from_utf8_unchecked<'a>(v: &'a [u8]) -> &'a str {
pub unsafe fn from_c_str(s: *const i8) -> &'static str {
let s = s as *const u8;
let mut len = 0;
while *s.offset(len as int) != 0 {
while *s.offset(len as isize) != 0 {
len += 1;
}
let v: &'static [u8] = ::mem::transmute(Slice { data: s, len: len });
Expand All @@ -258,7 +257,7 @@ impl CharEq for char {
fn matches(&mut self, c: char) -> bool { *self == c }

#[inline]
fn only_ascii(&self) -> bool { (*self as usize) < 128 }
fn only_ascii(&self) -> bool { (*self as u32) < 128 }
}

impl<F> CharEq for F where F: FnMut(char) -> bool {
Expand Down Expand Up @@ -764,7 +763,8 @@ impl TwoWaySearcher {
// How far we can jump when we encounter a mismatch is all based on the fact
// that (u, v) is a critical factorization for the needle.
#[inline]
fn next(&mut self, haystack: &[u8], needle: &[u8], long_period: bool) -> Option<(usize, usize)> {
fn next(&mut self, haystack: &[u8], needle: &[u8], long_period: bool)
-> Option<(usize, usize)> {
'search: loop {
// Check that we have room to search in
if self.position + needle.len() > haystack.len() {
Expand Down Expand Up @@ -955,6 +955,7 @@ Section: Comparing strings
/// to compare &[u8] byte slices that are not necessarily valid UTF-8.
#[inline]
fn eq_slice_(a: &str, b: &str) -> bool {
// NOTE: In theory n should be libc::size_t and not usize, but libc is not available here
#[allow(improper_ctypes)]
extern { fn memcmp(s1: *const i8, s2: *const i8, n: usize) -> i32; }
a.len() == b.len() && unsafe {
Expand Down Expand Up @@ -1489,7 +1490,7 @@ impl StrExt for str {
fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
where P::Searcher: DoubleEndedSearcher<'a> {
let mut i = 0;
let mut j = self.len();
let mut j = 0;
let mut matcher = pat.into_searcher(self);
if let Some((a, b)) = matcher.next_reject() {
i = a;
Expand All @@ -1507,7 +1508,7 @@ impl StrExt for str {

#[inline]
fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
let mut i = 0;
let mut i = self.len();
let mut matcher = pat.into_searcher(self);
if let Some((a, _)) = matcher.next_reject() {
i = a;
Expand All @@ -1521,7 +1522,7 @@ impl StrExt for str {
#[inline]
fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
where P::Searcher: ReverseSearcher<'a> {
let mut j = self.len();
let mut j = 0;
let mut matcher = pat.into_searcher(self);
if let Some((_, b)) = matcher.next_reject_back() {
j = b;
Expand Down
27 changes: 19 additions & 8 deletions src/libcore/str/pattern.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ pub trait Pattern<'a>: Sized {

// Searcher

#[derive(Copy, Clone, Eq, PartialEq, Debug)]
pub enum SearchStep {
Match(usize, usize),
Reject(usize, usize),
Expand Down Expand Up @@ -190,7 +191,7 @@ impl<'a, C: CharEq> DoubleEndedSearcher<'a> for CharEqSearcher<'a, C> {}

// Impl for &str

// TODO: Optimize the naive implementation here
// Todo: Optimize the naive implementation here

#[derive(Clone)]
pub struct StrSearcher<'a, 'b> {
Expand Down Expand Up @@ -235,13 +236,16 @@ unsafe impl<'a, 'b> Searcher<'a> for StrSearcher<'a, 'b> {
},
|m: &mut StrSearcher| {
// Forward step for nonempty needle
let possible_match = &m.haystack[m.start .. m.start + m.needle.len()];
// Compare if bytes are equal
let possible_match = &m.haystack.as_bytes()[m.start .. m.start + m.needle.len()];
let current_start = m.start;
if possible_match == m.needle {
if possible_match == m.needle.as_bytes() {
m.start += m.needle.len();
SearchStep::Match(current_start, m.start)
} else {
m.start += possible_match.chars().next().unwrap().len_utf8();
// Skip a char
let haystack_suffix = &m.haystack[m.start..];
m.start += haystack_suffix.chars().next().unwrap().len_utf8();
SearchStep::Reject(current_start, m.start)
}
})
Expand All @@ -262,13 +266,16 @@ unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b> {
},
|m: &mut StrSearcher| {
// Backward step for nonempty needle
let possible_match = &m.haystack[m.end - m.needle.len() .. m.end];
// Compare if bytes are equal
let possible_match = &m.haystack.as_bytes()[m.end - m.needle.len() .. m.end];
let current_end = m.end;
if possible_match == m.needle {
if possible_match == m.needle.as_bytes() {
m.end -= m.needle.len();
SearchStep::Match(m.end, current_end)
} else {
m.end -= possible_match.chars().rev().next().unwrap().len_utf8();
// Skip a char
let haystack_prefix = &m.haystack[..m.end];
m.end -= haystack_prefix.chars().rev().next().unwrap().len_utf8();
SearchStep::Reject(m.end, current_end)
}
})
Expand All @@ -290,6 +297,9 @@ where F: FnOnce(&mut StrSearcher) -> SearchStep,
} else if m.start + m.needle.len() <= m.end {
// Case for needle != ""
g(&mut m)
} else if m.start < m.end {
m.done = true;
SearchStep::Reject(m.start, m.end)
} else {
m.done = true;
SearchStep::Done
Expand Down Expand Up @@ -352,7 +362,8 @@ impl<'a, F> Pattern<'a> for F where F: FnMut(char) -> bool {

use ops::Deref;

impl<'a, 'b, P: 'b + ?Sized, T: Deref<Target = P> + ?Sized> Pattern<'a> for &'b T where &'b P: Pattern<'a> {
impl<'a, 'b, P: 'b + ?Sized, T: Deref<Target = P> + ?Sized> Pattern<'a> for &'b T
where &'b P: Pattern<'a> {
type Searcher = <&'b P as Pattern<'a>>::Searcher;
associated_items!(<&'b P as Pattern<'a>>::Searcher,
s, (&**s));
Expand Down
146 changes: 144 additions & 2 deletions src/libcoretest/str.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
Expand Down Expand Up @@ -139,8 +139,150 @@ fn test_utf16_code_units() {
vec![0xE9, 0xD83D, 0xDCA9])
}

#[test]
fn starts_with_in_unicode() {
assert!(!"├── Cargo.toml".starts_with("# "));
}

// rm x86_64-unknown-linux-gnu/stage1/test/coretesttest-x86_64-unknown-linux-gnu; env PLEASE_BENCH=1 make check-stage1-coretest TESTNAME=str::bench
#[test]
fn starts_short_long() {
assert!(!"".starts_with("##"));
assert!(!"##".starts_with("####"));
assert!("####".starts_with("##"));
assert!(!"##ä".starts_with("####"));
assert!("####ä".starts_with("##"));
assert!(!"##".starts_with("####ä"));
assert!("##ä##".starts_with("##ä"));

assert!("".starts_with(""));
assert!("ä".starts_with(""));
assert!("#ä".starts_with(""));
assert!("##ä".starts_with(""));
assert!("ä###".starts_with(""));
assert!("#ä##".starts_with(""));
assert!("##ä#".starts_with(""));
}

#[test]
fn contains_weird_cases() {
assert!("* \t".contains_char(' '));
assert!(!"* \t".contains_char('?'));
assert!(!"* \t".contains_char('\u{1F4A9}'));
}

#[test]
fn trim_ws() {
assert_eq!(" \t a \t ".trim_left_matches(|c: char| c.is_whitespace()),
"a \t ");
assert_eq!(" \t a \t ".trim_right_matches(|c: char| c.is_whitespace()),
" \t a");
assert_eq!(" \t a \t ".trim_matches(|c: char| c.is_whitespace()),
"a");
assert_eq!(" \t \t ".trim_left_matches(|c: char| c.is_whitespace()),
"");
assert_eq!(" \t \t ".trim_right_matches(|c: char| c.is_whitespace()),
"");
assert_eq!(" \t \t ".trim_matches(|c: char| c.is_whitespace()),
"");
}

mod pattern {
use std::str::Pattern;
use std::str::{Searcher, ReverseSearcher, DoubleEndedSearcher};
use std::str::SearchStep::{self, Match, Reject, Done};

macro_rules! make_test {
($name:ident, $p:expr, $h:expr, [$($e:expr,)*]) => {
mod $name {
use std::str::Pattern;
use std::str::{Searcher, ReverseSearcher, DoubleEndedSearcher};
use std::str::SearchStep::{self, Match, Reject, Done};
use super::{cmp_search_to_vec};
#[test]
fn fwd() {
cmp_search_to_vec(false, $p, $h, vec![$($e),*]);
}
#[test]
fn bwd() {
cmp_search_to_vec(true, $p, $h, vec![$($e),*]);
}
}
}
}

fn cmp_search_to_vec<'a, P: Pattern<'a>>(rev: bool, pat: P, haystack: &'a str,
right: Vec<SearchStep>)
where P::Searcher: ReverseSearcher<'a>
{
let mut searcher = pat.into_searcher(haystack);
let mut v = vec![];
loop {
match if !rev {searcher.next()} else {searcher.next_back()} {
Match(a, b) => v.push(Match(a, b)),
Reject(a, b) => v.push(Reject(a, b)),
Done => break,
}
}
if rev {
v.reverse();
}
assert_eq!(v, right);
}

make_test!(str_searcher_ascii_haystack, "bb", "abbcbbd", [
Reject(0, 1),
Match (1, 3),
Reject(3, 4),
Match (4, 6),
Reject(6, 7),
]);
make_test!(str_searcher_empty_needle_ascii_haystack, "", "abbcbbd", [
Match(0, 0),
Match(1, 1),
Match(2, 2),
Match(3, 3),
Match(4, 4),
Match(5, 5),
Match(6, 6),
Match(7, 7),
]);
make_test!(str_searcher_mulibyte_haystack, " ", "├──", [
Reject(0, 3),
Reject(3, 6),
Reject(6, 9),
]);
make_test!(str_searcher_empty_needle_mulibyte_haystack, "", "├──", [
Match(0, 0),
Match(3, 3),
Match(6, 6),
Match(9, 9),
]);
make_test!(str_searcher_empty_needle_empty_haystack, "", "", [
Match(0, 0),
]);
make_test!(str_searcher_nonempty_needle_empty_haystack, "├", "", [
]);
make_test!(char_searcher_ascii_haystack, 'b', "abbcbbd", [
Reject(0, 1),
Match (1, 2),
Match (2, 3),
Reject(3, 4),
Match (4, 5),
Match (5, 6),
Reject(6, 7),
]);
make_test!(char_searcher_mulibyte_haystack, ' ', "├──", [
Reject(0, 3),
Reject(3, 6),
Reject(6, 9),
]);
make_test!(char_searcher_short_haystack, '\u{1F4A9}', "* \t", [
Reject(0, 1),
Reject(1, 2),
Reject(2, 3),
]);

}

mod bench {
macro_rules! make_test_inner {
Expand Down

0 comments on commit a641996

Please sign in to comment.