From 647371e8cda2475a081825cf5c3fcb5379b51585 Mon Sep 17 00:00:00 2001 From: Diggory Hardy <git@dhardy.name> Date: Fri, 29 Jun 2018 11:01:56 +0100 Subject: [PATCH 01/14] sample_indices: revise benchmarks (pre-optimisation) --- benches/seq.rs | 56 ++++++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/benches/seq.rs b/benches/seq.rs index 260e2334a41..d844ab7f599 100644 --- a/benches/seq.rs +++ b/benches/seq.rs @@ -1,4 +1,5 @@ #![feature(test)] +#![allow(non_snake_case)] extern crate test; extern crate rand; @@ -27,28 +28,31 @@ fn seq_slice_choose_1_of_1000(b: &mut Bencher) { }) } -#[bench] -fn seq_slice_choose_multiple_1_of_1000(b: &mut Bencher) { - let mut rng = SmallRng::from_rng(thread_rng()).unwrap(); - let x : &[usize] = &[1; 1000]; - b.iter(|| { - x.choose_multiple(&mut rng, 1).cloned().next() - }) -} - -#[bench] -fn seq_slice_choose_multiple_10_of_100(b: &mut Bencher) { - let mut rng = SmallRng::from_rng(thread_rng()).unwrap(); - let x : &[usize] = &[1; 100]; - let mut buf = [0; 10]; - b.iter(|| { - for (v, slot) in x.choose_multiple(&mut rng, buf.len()).zip(buf.iter_mut()) { - *slot = *v; +macro_rules! seq_slice_choose_multiple { + ($name:ident, $amount:expr, $length:expr) => { + #[bench] + fn $name(b: &mut Bencher) { + let mut rng = SmallRng::from_rng(thread_rng()).unwrap(); + let x : &[i32] = &[$amount; $length]; + let mut result = [0i32; $amount]; + b.iter(|| { + // Collect full result to prevent unwanted shortcuts getting + // first element (in case sample_indices returns an iterator). + for (slot, sample) in result.iter_mut().zip( + x.choose_multiple(&mut rng, $amount)) { + *slot = *sample; + } + result[$amount-1] + }) } - buf - }) + } } +seq_slice_choose_multiple!(seq_slice_choose_multiple_1_of_1000, 1, 1000); +seq_slice_choose_multiple!(seq_slice_choose_multiple_950_of_1000, 950, 1000); +seq_slice_choose_multiple!(seq_slice_choose_multiple_10_of_100, 10, 100); +seq_slice_choose_multiple!(seq_slice_choose_multiple_90_of_100, 90, 100); + #[bench] fn seq_iter_choose_from_100(b: &mut Bencher) { let mut rng = SmallRng::from_rng(thread_rng()).unwrap(); @@ -78,17 +82,21 @@ fn seq_iter_choose_multiple_fill_10_of_100(b: &mut Bencher) { } macro_rules! sample_indices { - ($name:ident, $amount:expr, $length:expr) => { + ($name:ident, $fn:ident, $amount:expr, $length:expr) => { #[bench] fn $name(b: &mut Bencher) { let mut rng = SmallRng::from_rng(thread_rng()).unwrap(); b.iter(|| { - sample_indices(&mut rng, $length, $amount) + $fn(&mut rng, $length, $amount) }) } } } -sample_indices!(seq_sample_indices_10_of_1k, 10, 1000); -sample_indices!(seq_sample_indices_50_of_1k, 50, 1000); -sample_indices!(seq_sample_indices_100_of_1k, 100, 1000); +sample_indices!(misc_sample_indices_1_of_1k, sample_indices, 1, 1000); +sample_indices!(misc_sample_indices_10_of_1k, sample_indices, 10, 1000); +sample_indices!(misc_sample_indices_100_of_1k, sample_indices, 100, 1000); +sample_indices!(misc_sample_indices_100_of_1M, sample_indices, 100, 1000_000); +sample_indices!(misc_sample_indices_100_of_1G, sample_indices, 100, 1000_000_000); +sample_indices!(misc_sample_indices_400_of_1G, sample_indices, 400, 1000_000_000); +sample_indices!(misc_sample_indices_600_of_1G, sample_indices, 600, 1000_000_000); From 28d4949006a0960b41d223a144e9f84cd7d01f53 Mon Sep 17 00:00:00 2001 From: Diggory Hardy <git@dhardy.name> Date: Thu, 7 Jun 2018 12:43:34 +0100 Subject: [PATCH 02/14] sample_indices: refactor; switch _inplaces to u32 only The sample_indices_inplace algorithm is inappropriate for large numbers --- src/seq.rs | 95 +++++++++++++++++++++++++++--------------------------- 1 file changed, 47 insertions(+), 48 deletions(-) diff --git a/src/seq.rs b/src/seq.rs index e030712b3d1..70471e1be32 100644 --- a/src/seq.rs +++ b/src/seq.rs @@ -522,76 +522,78 @@ pub fn sample_indices<R>(rng: &mut R, length: usize, amount: usize) -> Vec<usize // and a trade off could probably be made between memory/cpu, since hashmap operations // are slower than array index swapping. if amount >= length / 20 { - sample_indices_inplace(rng, length, amount) + sample_indices_inplace(rng, length as u32, amount as u32) + .into_iter().map(|x| x as usize).collect() } else { sample_indices_cache(rng, length, amount) } } -/// Sample an amount of indices using an inplace partial fisher yates method. +/// Randomly sample exactly `amount` indices from `0..length`, using an inplace +/// partial Fisher-Yates method. /// /// This allocates the entire `length` of indices and randomizes only the first `amount`. /// It then truncates to `amount` and returns. +/// +/// This method is not appropriate for large `length` and potentially uses a lot +/// of memory; because of this we only implement for `u32` index (which improves +/// performance in all cases). /// -/// This is better than using a `HashMap` "cache" when `amount >= length / 2` -/// since it does not require allocating an extra cache and is much faster. +/// This is likely the fastest for small lengths since it avoids the need for +/// allocations. Set-up is `O(length)` time and memory and shuffling is +/// `O(amount)` time. #[cfg(feature = "alloc")] -fn sample_indices_inplace<R>(rng: &mut R, length: usize, amount: usize) -> Vec<usize> +fn sample_indices_inplace<R>(rng: &mut R, length: u32, amount: u32) + -> Vec<u32> where R: Rng + ?Sized, { debug_assert!(amount <= length); - let mut indices: Vec<usize> = Vec::with_capacity(length); + let mut indices: Vec<u32> = Vec::with_capacity(length as usize); indices.extend(0..length); for i in 0..amount { - let j: usize = rng.gen_range(i, length); - indices.swap(i, j); + let j: u32 = rng.gen_range(i, length); + indices.swap(i as usize, j as usize); } - indices.truncate(amount); - debug_assert_eq!(indices.len(), amount); + indices.truncate(amount as usize); + debug_assert_eq!(indices.len(), amount as usize); indices } - -/// This method performs a partial fisher-yates on a range of indices using a -/// `HashMap` as a cache to record potential collisions. +/// Randomly sample exactly `amount` indices from `0..length`, using a +/// dynamically-cached partial Fisher-Yates method. /// -/// The cache avoids allocating the entire `length` of values. This is especially useful when -/// `amount <<< length`, i.e. select 3 non-repeating from `1_000_000` +/// The cache avoids allocating the entire `length` of values. This is +/// especially useful when `amount <<< length`; e.g. selecting 3 non-repeating +/// values from `1_000_000`. The algorithm is `O(amount)` time and memory, +/// but due to overheads will often be slower than other approaches. #[cfg(feature = "alloc")] -fn sample_indices_cache<R>( - rng: &mut R, - length: usize, - amount: usize, -) -> Vec<usize> +fn sample_indices_cache<R>(rng: &mut R, length: usize, amount: usize) + -> Vec<usize> where R: Rng + ?Sized, { debug_assert!(amount <= length); #[cfg(feature="std")] let mut cache = HashMap::with_capacity(amount); #[cfg(not(feature="std"))] let mut cache = BTreeMap::new(); - let mut out = Vec::with_capacity(amount); + let mut indices = Vec::with_capacity(amount); for i in 0..amount { let j: usize = rng.gen_range(i, length); - // equiv: let tmp = slice[i]; - let tmp = match cache.get(&i) { - Some(e) => *e, + // get the current values at i and j ... + let x_i = match cache.get(&i) { + Some(x) => *x, None => i, }; - - // equiv: slice[i] = slice[j]; - let x = match cache.get(&j) { + let x_j = match cache.get(&j) { Some(x) => *x, None => j, }; - // equiv: slice[j] = tmp; - cache.insert(j, tmp); - - // note that in the inplace version, slice[i] is automatically "returned" value - out.push(x); + // ... and swap them + cache.insert(j, x_i); + indices.push(x_j); // push at position i } - debug_assert_eq!(out.len(), amount); - out + debug_assert_eq!(indices.len(), amount); + indices } #[cfg(test)] @@ -752,14 +754,19 @@ mod test { let v = sample_slice(&mut r, &[42, 133], 2); assert!(&v[..] == [42, 133] || v[..] == [133, 42]); - assert_eq!(&sample_indices_inplace(&mut r, 0, 0)[..], [0usize; 0]); - assert_eq!(&sample_indices_inplace(&mut r, 1, 0)[..], [0usize; 0]); + assert_eq!(&sample_indices_inplace(&mut r, 0, 0)[..], [0; 0]); + assert_eq!(&sample_indices_inplace(&mut r, 1, 0)[..], [0; 0]); assert_eq!(&sample_indices_inplace(&mut r, 1, 1)[..], [0]); - assert_eq!(&sample_indices_cache(&mut r, 0, 0)[..], [0usize; 0]); - assert_eq!(&sample_indices_cache(&mut r, 1, 0)[..], [0usize; 0]); + assert_eq!(&sample_indices_cache(&mut r, 0, 0)[..], [0; 0]); + assert_eq!(&sample_indices_cache(&mut r, 1, 0)[..], [0; 0]); assert_eq!(&sample_indices_cache(&mut r, 1, 1)[..], [0]); + // These algorithms should be fast with big numbers. Test average. + let sum = sample_indices_cache(&mut r, 1 << 50, 10) + .iter().fold(0, |a, b| a + b); + assert!(1 << 50 < sum && sum < (1 << 50) * 25); + // Make sure lucky 777's aren't lucky let slice = &[42, 777]; let mut num_42 = 0; @@ -783,27 +790,19 @@ mod test { fn test_sample_slice() { let xor_rng = XorShiftRng::from_seed; - let max_range = 100; let mut r = ::test::rng(403); - for length in 1usize..max_range { + for n in 1usize..20 { + let length = 5*n - 4; // 1, 6, ... let amount = r.gen_range(0, length); let mut seed = [0u8; 16]; r.fill(&mut seed); - // assert that the two index methods give exactly the same result - let inplace = sample_indices_inplace( - &mut xor_rng(seed), length, amount); - let cache = sample_indices_cache( - &mut xor_rng(seed), length, amount); - assert_eq!(inplace, cache); - // assert the basics work let regular = sample_indices( &mut xor_rng(seed), length, amount); assert_eq!(regular.len(), amount); assert!(regular.iter().all(|e| *e < length)); - assert_eq!(regular, inplace); // also test that sampling the slice works let vec: Vec<usize> = (0..length).collect(); From 949833ddd6b023f61d6f6a64d4b630d24b3792ba Mon Sep 17 00:00:00 2001 From: Diggory Hardy <git@dhardy.name> Date: Fri, 29 Jun 2018 11:02:58 +0100 Subject: [PATCH 03/14] sample_indices: add Floyd's algorithm and update selection logic --- src/seq.rs | 126 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 105 insertions(+), 21 deletions(-) diff --git a/src/seq.rs b/src/seq.rs index 70471e1be32..47e5171f320 100644 --- a/src/seq.rs +++ b/src/seq.rs @@ -495,12 +495,16 @@ pub fn sample_slice_ref<'a, R, T>(rng: &mut R, slice: &'a [T], amount: usize) -> /// /// The values are non-repeating and in random order. /// -/// This implementation uses `O(amount)` time and memory. +/// This method is used internally by the slice sampling methods, but it can +/// sometimes be useful to have the indices themselves so this is provided as +/// an alternative. /// -/// This method is used internally by the slice sampling methods, but it can sometimes be useful to -/// have the indices themselves so this is provided as an alternative. +/// The implementation used is not specified; we automatically select the +/// fastest available implementation. Roughly speaking, complexity is +/// `O(amount)` if `amount` is small relative to `length`, otherwise `O(length)`. /// -/// Panics if `amount > length` +/// Panics if `amount > length`; may panic with extremely large `amount` or +/// `length` (when `36*length` or `2720*amount` overflows `usize`). #[cfg(feature = "alloc")] pub fn sample_indices<R>(rng: &mut R, length: usize, amount: usize) -> Vec<usize> where R: Rng + ?Sized, @@ -508,29 +512,58 @@ pub fn sample_indices<R>(rng: &mut R, length: usize, amount: usize) -> Vec<usize if amount > length { panic!("`amount` must be less than or equal to `slice.len()`"); } - - // We are going to have to allocate at least `amount` for the output no matter what. However, - // if we use the `cached` version we will have to allocate `amount` as a HashMap as well since - // it inserts an element for every loop. - // - // Therefore, if `amount >= length / 2` then inplace will be both faster and use less memory. - // In fact, benchmarks show the inplace version is faster for length up to about 20 times - // faster than amount. - // - // TODO: there is probably even more fine-tuning that can be done here since - // `HashMap::with_capacity(amount)` probably allocates more than `amount` in practice, - // and a trade off could probably be made between memory/cpu, since hashmap operations - // are slower than array index swapping. - if amount >= length / 20 { - sample_indices_inplace(rng, length as u32, amount as u32) - .into_iter().map(|x| x as usize).collect() + + // Choice of algorithm here depends on both length and amount. See: + // https://github.com/rust-lang-nursery/rand/pull/479 + + if amount < 517 { + const C: [[usize; 2]; 2] = [[1, 36], [200, 440]]; + let j = if length < 500_000 { 0 } else { 1 }; + let m4 = 4 * amount; + if C[0][j] * length < (C[1][j] + m4) * amount { + sample_indices_inplace(rng, length as u32, amount as u32) + .into_iter() + .map(|x| x as usize) + .collect() + } else { + sample_indices_floyd(rng, length, amount) + } } else { - sample_indices_cache(rng, length, amount) + const C: [[usize; 2]; 2] = [[1, 36], [62*40, 68*40]]; + let j = if length < 500_000 { 0 } else { 1 }; + if C[0][j] * length < C[1][j] * amount { + sample_indices_inplace(rng, length as u32, amount as u32) + .into_iter() + .map(|x| x as usize) + .collect() + } else { + sample_indices_cache(rng, length, amount) + } + } +} + +/// Randomly sample exactly `amount` indices from `0..length`, using Floyd's +/// combination algorithm. +/// +/// This implementation uses `O(amount)` memory and `O(amount^2)` time. +#[cfg(feature = "alloc")] +fn sample_indices_floyd<R>(rng: &mut R, length: usize, amount: usize) + -> Vec<usize> + where R: Rng + ?Sized, +{ + debug_assert!(amount <= length); + let mut indices = Vec::with_capacity(amount); + for j in length - amount .. length { + let t = rng.gen_range(0, j + 1); + let t = if indices.contains(&t) { j } else { t }; + indices.push( t ); } + indices } /// Randomly sample exactly `amount` indices from `0..length`, using an inplace /// partial Fisher-Yates method. +/// Sample an amount of indices using an inplace partial fisher yates method. /// /// This allocates the entire `length` of indices and randomizes only the first `amount`. /// It then truncates to `amount` and returns. @@ -762,11 +795,19 @@ mod test { assert_eq!(&sample_indices_cache(&mut r, 1, 0)[..], [0; 0]); assert_eq!(&sample_indices_cache(&mut r, 1, 1)[..], [0]); + assert_eq!(&sample_indices_floyd(&mut r, 0, 0)[..], [0; 0]); + assert_eq!(&sample_indices_floyd(&mut r, 1, 0)[..], [0; 0]); + assert_eq!(&sample_indices_floyd(&mut r, 1, 1)[..], [0]); + // These algorithms should be fast with big numbers. Test average. let sum = sample_indices_cache(&mut r, 1 << 50, 10) .iter().fold(0, |a, b| a + b); assert!(1 << 50 < sum && sum < (1 << 50) * 25); + let sum = sample_indices_floyd(&mut r, 1 << 50, 10) + .iter().fold(0, |a, b| a + b); + assert!(1 << 50 < sum && sum < (1 << 50) * 25); + // Make sure lucky 777's aren't lucky let slice = &[42, 777]; let mut num_42 = 0; @@ -818,7 +859,50 @@ mod test { } } } + + #[test] + #[cfg(feature = "alloc")] + fn test_sample_alg() { + let xor_rng = XorShiftRng::from_seed; + let mut r = ::test::rng(403); + let mut seed = [0u8; 16]; + + // We can't test which algorithm is used directly, but each should + // produce a different sample with the same parameters. + + // A small length and relatively large amount should use inplace + r.fill(&mut seed); + let (length, amount) = (100, 50); + let v1 = sample_indices(&mut xor_rng(seed), length, amount); + let v2 = sample_indices_inplace(&mut xor_rng(seed), + length as u32, amount as u32); + assert!(v1.iter().all(|e| *e < length)); + assert!(v1.iter().zip(v2.iter()).all(|(x,y)| *x == *y as usize)); + + // Test other algs do produce different results + let v3 = sample_indices_floyd(&mut xor_rng(seed), length, amount); + let v4 = sample_indices_cache(&mut xor_rng(seed), length, amount); + assert!(v1 != v3); + assert!(v1 != v4); + + // A large length and small amount should use Floyd + r.fill(&mut seed); + let (length, amount) = (1<<20, 50); + let v1 = sample_indices(&mut xor_rng(seed), length, amount); + let v2 = sample_indices_floyd(&mut xor_rng(seed), length, amount); + assert!(v1.iter().all(|e| *e < length)); + assert_eq!(v1, v2); + + // A large length and larger amount should use cache + r.fill(&mut seed); + let (length, amount) = (1<<20, 600); + let v1 = sample_indices(&mut xor_rng(seed), length, amount); + let v2 = sample_indices_cache(&mut xor_rng(seed), length, amount); + assert!(v1.iter().all(|e| *e < length)); + assert_eq!(v1, v2); + } + #[test] #[cfg(feature = "alloc")] fn test_weighted() { From 2f5a03a7f07919b701e7f1126a2820cf15312136 Mon Sep 17 00:00:00 2001 From: Diggory Hardy <git@dhardy.name> Date: Sat, 2 Jun 2018 10:51:30 +0100 Subject: [PATCH 04/14] sample_indices: use u32 internally only (controversial) --- src/seq.rs | 123 +++++++++++++++++++++++++++-------------------------- 1 file changed, 62 insertions(+), 61 deletions(-) diff --git a/src/seq.rs b/src/seq.rs index 47e5171f320..2fd2dd2b443 100644 --- a/src/seq.rs +++ b/src/seq.rs @@ -396,7 +396,7 @@ impl<I> IteratorRandom for I where I: Iterator + Sized {} pub struct SliceChooseIter<'a, S: ?Sized + 'a, T: 'a> { slice: &'a S, _phantom: ::core::marker::PhantomData<T>, - indices: vec::IntoIter<usize>, + indices: vec::IntoIter<u32>, } #[cfg(feature = "alloc")] @@ -405,7 +405,7 @@ impl<'a, S: Index<usize, Output = T> + ?Sized + 'a, T: 'a> Iterator for SliceCho fn next(&mut self) -> Option<Self::Item> { // TODO: investigate using SliceIndex::get_unchecked when stable - self.indices.next().map(|i| &(*self.slice)[i]) + self.indices.next().map(|i| &(*self.slice)[i as usize]) } fn size_hint(&self) -> (usize, Option<usize>) { @@ -464,7 +464,7 @@ pub fn sample_slice<R, T>(rng: &mut R, slice: &[T], amount: usize) -> Vec<T> let indices = sample_indices(rng, slice.len(), amount); let mut out = Vec::with_capacity(amount); - out.extend(indices.iter().map(|i| slice[*i].clone())); + out.extend(indices.iter().map(|i| slice[*i as usize].clone())); out } @@ -487,7 +487,7 @@ pub fn sample_slice_ref<'a, R, T>(rng: &mut R, slice: &'a [T], amount: usize) -> let indices = sample_indices(rng, slice.len(), amount); let mut out = Vec::with_capacity(amount); - out.extend(indices.iter().map(|i| &slice[*i])); + out.extend(indices.iter().map(|i| &slice[*i as usize])); out } @@ -503,39 +503,44 @@ pub fn sample_slice_ref<'a, R, T>(rng: &mut R, slice: &'a [T], amount: usize) -> /// fastest available implementation. Roughly speaking, complexity is /// `O(amount)` if `amount` is small relative to `length`, otherwise `O(length)`. /// -/// Panics if `amount > length`; may panic with extremely large `amount` or -/// `length` (when `36*length` or `2720*amount` overflows `usize`). +/// Note that we only support `u32` indices since this covers the vast majority +/// of uses, and performance is significantly better than with `u64`. +/// +/// If an allocation-free `no_std` function is required, it is suggested +/// to adapt the internal `sample_indices_floyd` implementation. +/// +/// Panics if `amount > length` or if `length` is not reprentable as a `u32`. #[cfg(feature = "alloc")] -pub fn sample_indices<R>(rng: &mut R, length: usize, amount: usize) -> Vec<usize> +pub fn sample_indices<R>(rng: &mut R, length: usize, amount: usize) -> Vec<u32> where R: Rng + ?Sized, { if amount > length { - panic!("`amount` must be less than or equal to `slice.len()`"); + panic!("`amount` of samples must be less than or equal to `length`"); } + if length > (::core::u32::MAX as usize) { + panic!("`length` is not representable as `u32`"); + } + let amount = amount as u32; + let length = length as u32; // Choice of algorithm here depends on both length and amount. See: // https://github.com/rust-lang-nursery/rand/pull/479 + // We do some calculations with u64 to avoid overflow. if amount < 517 { - const C: [[usize; 2]; 2] = [[1, 36], [200, 440]]; + const C: [[u64; 2]; 2] = [[1, 36], [200, 440]]; let j = if length < 500_000 { 0 } else { 1 }; - let m4 = 4 * amount; - if C[0][j] * length < (C[1][j] + m4) * amount { - sample_indices_inplace(rng, length as u32, amount as u32) - .into_iter() - .map(|x| x as usize) - .collect() + let m4 = 4 * amount as u64; + if C[0][j] * (length as u64) < (C[1][j] + m4) * amount as u64 { + sample_indices_inplace(rng, length, amount) } else { sample_indices_floyd(rng, length, amount) } } else { - const C: [[usize; 2]; 2] = [[1, 36], [62*40, 68*40]]; + const C: [[u64; 2]; 2] = [[1, 36], [62*40, 68*40]]; let j = if length < 500_000 { 0 } else { 1 }; - if C[0][j] * length < C[1][j] * amount { - sample_indices_inplace(rng, length as u32, amount as u32) - .into_iter() - .map(|x| x as usize) - .collect() + if C[0][j] * (length as u64) < C[1][j] * amount as u64 { + sample_indices_inplace(rng, length, amount) } else { sample_indices_cache(rng, length, amount) } @@ -547,12 +552,12 @@ pub fn sample_indices<R>(rng: &mut R, length: usize, amount: usize) -> Vec<usize /// /// This implementation uses `O(amount)` memory and `O(amount^2)` time. #[cfg(feature = "alloc")] -fn sample_indices_floyd<R>(rng: &mut R, length: usize, amount: usize) - -> Vec<usize> +fn sample_indices_floyd<R>(rng: &mut R, length: u32, amount: u32) + -> Vec<u32> where R: Rng + ?Sized, { debug_assert!(amount <= length); - let mut indices = Vec::with_capacity(amount); + let mut indices = Vec::with_capacity(amount as usize); for j in length - amount .. length { let t = rng.gen_range(0, j + 1); let t = if indices.contains(&t) { j } else { t }; @@ -600,16 +605,16 @@ fn sample_indices_inplace<R>(rng: &mut R, length: u32, amount: u32) /// values from `1_000_000`. The algorithm is `O(amount)` time and memory, /// but due to overheads will often be slower than other approaches. #[cfg(feature = "alloc")] -fn sample_indices_cache<R>(rng: &mut R, length: usize, amount: usize) - -> Vec<usize> +fn sample_indices_cache<R>(rng: &mut R, length: u32, amount: u32) + -> Vec<u32> where R: Rng + ?Sized, { debug_assert!(amount <= length); - #[cfg(feature="std")] let mut cache = HashMap::with_capacity(amount); + #[cfg(feature="std")] let mut cache = HashMap::with_capacity(amount as usize); #[cfg(not(feature="std"))] let mut cache = BTreeMap::new(); - let mut indices = Vec::with_capacity(amount); + let mut indices = Vec::with_capacity(amount as usize); for i in 0..amount { - let j: usize = rng.gen_range(i, length); + let j: u32 = rng.gen_range(i, length); // get the current values at i and j ... let x_i = match cache.get(&i) { @@ -625,7 +630,7 @@ fn sample_indices_cache<R>(rng: &mut R, length: usize, amount: usize) cache.insert(j, x_i); indices.push(x_j); // push at position i } - debug_assert_eq!(indices.len(), amount); + debug_assert_eq!(indices.len(), amount as usize); indices } @@ -800,13 +805,13 @@ mod test { assert_eq!(&sample_indices_floyd(&mut r, 1, 1)[..], [0]); // These algorithms should be fast with big numbers. Test average. - let sum = sample_indices_cache(&mut r, 1 << 50, 10) + let sum = sample_indices_cache(&mut r, 1 << 25, 10) .iter().fold(0, |a, b| a + b); - assert!(1 << 50 < sum && sum < (1 << 50) * 25); + assert!(1 << 25 < sum && sum < (1 << 25) * 25); - let sum = sample_indices_floyd(&mut r, 1 << 50, 10) + let sum = sample_indices_floyd(&mut r, 1 << 25, 10) .iter().fold(0, |a, b| a + b); - assert!(1 << 50 < sum && sum < (1 << 50) * 25); + assert!(1 << 25 < sum && sum < (1 << 25) * 25); // Make sure lucky 777's aren't lucky let slice = &[42, 777]; @@ -833,7 +838,7 @@ mod test { let mut r = ::test::rng(403); - for n in 1usize..20 { + for n in 1..20 { let length = 5*n - 4; // 1, 6, ... let amount = r.gen_range(0, length); let mut seed = [0u8; 16]; @@ -843,20 +848,16 @@ mod test { let regular = sample_indices( &mut xor_rng(seed), length, amount); assert_eq!(regular.len(), amount); - assert!(regular.iter().all(|e| *e < length)); + assert!(regular.iter().all(|e| *e < length as u32)); // also test that sampling the slice works - let vec: Vec<usize> = (0..length).collect(); - { - let result = sample_slice(&mut xor_rng(seed), &vec, amount); - assert_eq!(result, regular); - } + let vec: Vec<u32> = (0..(length as u32)).collect(); + let result = sample_slice(&mut xor_rng(seed), &vec, amount); + assert_eq!(result, regular); - { - let result = sample_slice_ref(&mut xor_rng(seed), &vec, amount); - let expected = regular.iter().map(|v| v).collect::<Vec<_>>(); - assert_eq!(result, expected); - } + let result = sample_slice_ref(&mut xor_rng(seed), &vec, amount); + let expected = regular.iter().map(|v| v).collect::<Vec<_>>(); + assert_eq!(result, expected); } } @@ -868,38 +869,38 @@ mod test { let mut r = ::test::rng(403); let mut seed = [0u8; 16]; - // We can't test which algorithm is used directly, but each should - // produce a different sample with the same parameters. + // We can't test which algorithm is used directly, but Floyd's alg + // should produce different results from the others. // A small length and relatively large amount should use inplace r.fill(&mut seed); - let (length, amount) = (100, 50); - let v1 = sample_indices(&mut xor_rng(seed), length, amount); - let v2 = sample_indices_inplace(&mut xor_rng(seed), - length as u32, amount as u32); + let (length, amount): (u32, u32) = (100, 50); + let v1 = sample_indices(&mut xor_rng(seed), length as usize, amount as usize); + let v2 = sample_indices_inplace(&mut xor_rng(seed), length, amount); assert!(v1.iter().all(|e| *e < length)); - assert!(v1.iter().zip(v2.iter()).all(|(x,y)| *x == *y as usize)); + assert_eq!(v1, v2); - // Test other algs do produce different results + // Test Floyd's alg does produce different results let v3 = sample_indices_floyd(&mut xor_rng(seed), length, amount); - let v4 = sample_indices_cache(&mut xor_rng(seed), length, amount); assert!(v1 != v3); - assert!(v1 != v4); + // However, the cache alg should produce the same results + let v4 = sample_indices_cache(&mut xor_rng(seed), length, amount); + assert_eq!(v1, v4); // A large length and small amount should use Floyd r.fill(&mut seed); - let (length, amount) = (1<<20, 50); - let v1 = sample_indices(&mut xor_rng(seed), length, amount); + let (length, amount): (u32, u32) = (1<<20, 50); + let v1 = sample_indices(&mut xor_rng(seed), length as usize, amount as usize); let v2 = sample_indices_floyd(&mut xor_rng(seed), length, amount); assert!(v1.iter().all(|e| *e < length)); assert_eq!(v1, v2); // A large length and larger amount should use cache r.fill(&mut seed); - let (length, amount) = (1<<20, 600); - let v1 = sample_indices(&mut xor_rng(seed), length, amount); + let (length, amount): (u32, u32) = (1<<20, 600); + let v1 = sample_indices(&mut xor_rng(seed), length as usize, amount as usize); let v2 = sample_indices_cache(&mut xor_rng(seed), length, amount); - assert!(v1.iter().all(|e| *e < length)); + assert!(v1.iter().all(|e| *e < length as u32)); assert_eq!(v1, v2); } From d4da64e272e768fb92a950153a00d4d154703680 Mon Sep 17 00:00:00 2001 From: Diggory Hardy <git@dhardy.name> Date: Sat, 2 Jun 2018 14:40:02 +0100 Subject: [PATCH 05/14] sample_indices: add 'shuffled' parameter --- CHANGELOG.md | 5 +++ benches/seq.rs | 4 +-- src/seq.rs | 88 +++++++++++++++++++++++++++++++------------------- 3 files changed, 62 insertions(+), 35 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 716f6639a20..856d3ac35e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,11 @@ A [separate changelog is kept for rand_core](rand_core/CHANGELOG.md). You may also find the [Update Guide](UPDATING.md) useful. +## [0.6.0] - Unreleased + +### Sequences module +- Optimised and changed return type of the `sample_indices` function. (#479) + ## [0.5.4] - 2018-07-11 ### Platform support - Make `OsRng` work via WASM/stdweb for WebWorkers diff --git a/benches/seq.rs b/benches/seq.rs index d844ab7f599..a38ad1148f3 100644 --- a/benches/seq.rs +++ b/benches/seq.rs @@ -39,7 +39,7 @@ macro_rules! seq_slice_choose_multiple { // Collect full result to prevent unwanted shortcuts getting // first element (in case sample_indices returns an iterator). for (slot, sample) in result.iter_mut().zip( - x.choose_multiple(&mut rng, $amount)) { + x.choose_multiple(&mut rng, $amount, false)) { *slot = *sample; } result[$amount-1] @@ -87,7 +87,7 @@ macro_rules! sample_indices { fn $name(b: &mut Bencher) { let mut rng = SmallRng::from_rng(thread_rng()).unwrap(); b.iter(|| { - $fn(&mut rng, $length, $amount) + $fn(&mut rng, $length, $amount, true) }) } } diff --git a/src/seq.rs b/src/seq.rs index 2fd2dd2b443..72f7e58953d 100644 --- a/src/seq.rs +++ b/src/seq.rs @@ -66,12 +66,13 @@ pub trait SliceRandom { /// In case this API is not sufficiently flexible, use `sample_indices` then /// apply the indices to the slice. /// - /// Although the elements are selected randomly, the order of returned - /// elements is neither stable nor fully random. If random ordering is - /// desired, either use `partial_shuffle` or use this method and shuffle - /// the result. If stable order is desired, use `sample_indices`, sort the - /// result, then apply to the slice. - /// + /// If `shuffled == true` then the sampled values will be fully shuffled; + /// otherwise the values may only partially shuffled, depending on the + /// algorithm used (i.e. biases may exist in the ordering of sampled + /// elements). Depending on the algorithm used internally, full shuffling + /// may add significant overhead for `amount` > 10 or so, but not more + /// than double the time and often much less. + /// /// Complexity is expected to be the same as `sample_indices`. /// /// # Example @@ -82,16 +83,16 @@ pub trait SliceRandom { /// let sample = "Hello, audience!".as_bytes(); /// /// // collect the results into a vector: - /// let v: Vec<u8> = sample.choose_multiple(&mut rng, 3).cloned().collect(); + /// let v: Vec<u8> = sample.choose_multiple(&mut rng, 3, true).cloned().collect(); /// /// // store in a buffer: /// let mut buf = [0u8; 5]; - /// for (b, slot) in sample.choose_multiple(&mut rng, buf.len()).zip(buf.iter_mut()) { + /// for (b, slot) in sample.choose_multiple(&mut rng, buf.len(), true).zip(buf.iter_mut()) { /// *slot = *b; /// } /// ``` #[cfg(feature = "alloc")] - fn choose_multiple<R>(&self, rng: &mut R, amount: usize) -> SliceChooseIter<Self, Self::Item> + fn choose_multiple<R>(&self, rng: &mut R, amount: usize, shuffled: bool) -> SliceChooseIter<Self, Self::Item> where R: Rng + ?Sized; /// Similar to [`choose`], where the likelihood of each outcome may be @@ -317,14 +318,15 @@ impl<T> SliceRandom for [T] { } #[cfg(feature = "alloc")] - fn choose_multiple<R>(&self, rng: &mut R, amount: usize) -> SliceChooseIter<Self, Self::Item> + fn choose_multiple<R>(&self, rng: &mut R, amount: usize, shuffled: bool) + -> SliceChooseIter<Self, Self::Item> where R: Rng + ?Sized { let amount = ::core::cmp::min(amount, self.len()); SliceChooseIter { slice: self, _phantom: Default::default(), - indices: sample_indices(rng, self.len(), amount).into_iter(), + indices: sample_indices(rng, self.len(), amount, shuffled).into_iter(), } } @@ -461,7 +463,7 @@ pub fn sample_slice<R, T>(rng: &mut R, slice: &[T], amount: usize) -> Vec<T> where R: Rng + ?Sized, T: Clone { - let indices = sample_indices(rng, slice.len(), amount); + let indices = sample_indices(rng, slice.len(), amount, true); let mut out = Vec::with_capacity(amount); out.extend(indices.iter().map(|i| slice[*i as usize].clone())); @@ -484,24 +486,32 @@ pub fn sample_slice<R, T>(rng: &mut R, slice: &[T], amount: usize) -> Vec<T> pub fn sample_slice_ref<'a, R, T>(rng: &mut R, slice: &'a [T], amount: usize) -> Vec<&'a T> where R: Rng + ?Sized { - let indices = sample_indices(rng, slice.len(), amount); + let indices = sample_indices(rng, slice.len(), amount, true); let mut out = Vec::with_capacity(amount); out.extend(indices.iter().map(|i| &slice[*i as usize])); out } -/// Randomly sample exactly `amount` indices from `0..length`. +/// Randomly sample exactly `amount` distinct indices from `0..length`. /// -/// The values are non-repeating and in random order. +/// If `shuffled == true` then the sampled values will be fully shuffled; +/// otherwise the values may only partially shuffled, depending on the +/// algorithm used (i.e. biases may exist in the ordering of sampled elements). +/// Depending on the algorithm used internally, full shuffling may add +/// significant overhead for `amount` > 10 or so, but not more than double +/// the time and often much less. /// /// This method is used internally by the slice sampling methods, but it can /// sometimes be useful to have the indices themselves so this is provided as /// an alternative. /// /// The implementation used is not specified; we automatically select the -/// fastest available implementation. Roughly speaking, complexity is -/// `O(amount)` if `amount` is small relative to `length`, otherwise `O(length)`. +/// fastest available implementation for the `length` and `amount` parameters +/// (based on detailed profiling on an Intel Haswell CPU). Roughly speaking, +/// complexity is `O(amount)`, except that when `amount` is small, performance +/// is closer to `O(amount^2)`, and when `length` is close to `amount` then +/// `O(length)`. /// /// Note that we only support `u32` indices since this covers the vast majority /// of uses, and performance is significantly better than with `u64`. @@ -511,7 +521,8 @@ pub fn sample_slice_ref<'a, R, T>(rng: &mut R, slice: &'a [T], amount: usize) -> /// /// Panics if `amount > length` or if `length` is not reprentable as a `u32`. #[cfg(feature = "alloc")] -pub fn sample_indices<R>(rng: &mut R, length: usize, amount: usize) -> Vec<u32> +pub fn sample_indices<R>(rng: &mut R, length: usize, amount: usize, + shuffled: bool) -> Vec<u32> where R: Rng + ?Sized, { if amount > length { @@ -534,7 +545,7 @@ pub fn sample_indices<R>(rng: &mut R, length: usize, amount: usize) -> Vec<u32> if C[0][j] * (length as u64) < (C[1][j] + m4) * amount as u64 { sample_indices_inplace(rng, length, amount) } else { - sample_indices_floyd(rng, length, amount) + sample_indices_floyd(rng, length, amount, shuffled) } } else { const C: [[u64; 2]; 2] = [[1, 36], [62*40, 68*40]]; @@ -549,19 +560,30 @@ pub fn sample_indices<R>(rng: &mut R, length: usize, amount: usize) -> Vec<u32> /// Randomly sample exactly `amount` indices from `0..length`, using Floyd's /// combination algorithm. +/// +/// If `shuffled == false`, the values are only partially shuffled (i.e. biases +/// exist in the ordering of sampled elements). If `shuffled == true`, the +/// values are fully shuffled. /// /// This implementation uses `O(amount)` memory and `O(amount^2)` time. #[cfg(feature = "alloc")] -fn sample_indices_floyd<R>(rng: &mut R, length: u32, amount: u32) - -> Vec<u32> +fn sample_indices_floyd<R>(rng: &mut R, length: u32, amount: u32, shuffled: bool) -> Vec<u32> where R: Rng + ?Sized, { debug_assert!(amount <= length); let mut indices = Vec::with_capacity(amount as usize); for j in length - amount .. length { let t = rng.gen_range(0, j + 1); - let t = if indices.contains(&t) { j } else { t }; - indices.push( t ); + if indices.contains(&t) { + indices.push(j) + } else { + indices.push(t) + }; + } + if shuffled { + // Note that there is a variant of Floyd's algorithm with native full + // shuffling, but it is slow because it requires arbitrary insertions. + indices.shuffle(rng); } indices } @@ -800,16 +822,16 @@ mod test { assert_eq!(&sample_indices_cache(&mut r, 1, 0)[..], [0; 0]); assert_eq!(&sample_indices_cache(&mut r, 1, 1)[..], [0]); - assert_eq!(&sample_indices_floyd(&mut r, 0, 0)[..], [0; 0]); - assert_eq!(&sample_indices_floyd(&mut r, 1, 0)[..], [0; 0]); - assert_eq!(&sample_indices_floyd(&mut r, 1, 1)[..], [0]); + assert_eq!(&sample_indices_floyd(&mut r, 0, 0, false)[..], [0; 0]); + assert_eq!(&sample_indices_floyd(&mut r, 1, 0, false)[..], [0; 0]); + assert_eq!(&sample_indices_floyd(&mut r, 1, 1, false)[..], [0]); // These algorithms should be fast with big numbers. Test average. let sum = sample_indices_cache(&mut r, 1 << 25, 10) .iter().fold(0, |a, b| a + b); assert!(1 << 25 < sum && sum < (1 << 25) * 25); - let sum = sample_indices_floyd(&mut r, 1 << 25, 10) + let sum = sample_indices_floyd(&mut r, 1 << 25, 10, false) .iter().fold(0, |a, b| a + b); assert!(1 << 25 < sum && sum < (1 << 25) * 25); @@ -846,7 +868,7 @@ mod test { // assert the basics work let regular = sample_indices( - &mut xor_rng(seed), length, amount); + &mut xor_rng(seed), length, amount, true); assert_eq!(regular.len(), amount); assert!(regular.iter().all(|e| *e < length as u32)); @@ -875,13 +897,13 @@ mod test { // A small length and relatively large amount should use inplace r.fill(&mut seed); let (length, amount): (u32, u32) = (100, 50); - let v1 = sample_indices(&mut xor_rng(seed), length as usize, amount as usize); + let v1 = sample_indices(&mut xor_rng(seed), length as usize, amount as usize, true); let v2 = sample_indices_inplace(&mut xor_rng(seed), length, amount); assert!(v1.iter().all(|e| *e < length)); assert_eq!(v1, v2); // Test Floyd's alg does produce different results - let v3 = sample_indices_floyd(&mut xor_rng(seed), length, amount); + let v3 = sample_indices_floyd(&mut xor_rng(seed), length, amount, true); assert!(v1 != v3); // However, the cache alg should produce the same results let v4 = sample_indices_cache(&mut xor_rng(seed), length, amount); @@ -890,15 +912,15 @@ mod test { // A large length and small amount should use Floyd r.fill(&mut seed); let (length, amount): (u32, u32) = (1<<20, 50); - let v1 = sample_indices(&mut xor_rng(seed), length as usize, amount as usize); - let v2 = sample_indices_floyd(&mut xor_rng(seed), length, amount); + let v1 = sample_indices(&mut xor_rng(seed), length as usize, amount as usize, true); + let v2 = sample_indices_floyd(&mut xor_rng(seed), length, amount, true); assert!(v1.iter().all(|e| *e < length)); assert_eq!(v1, v2); // A large length and larger amount should use cache r.fill(&mut seed); let (length, amount): (u32, u32) = (1<<20, 600); - let v1 = sample_indices(&mut xor_rng(seed), length as usize, amount as usize); + let v1 = sample_indices(&mut xor_rng(seed), length as usize, amount as usize, true); let v2 = sample_indices_cache(&mut xor_rng(seed), length, amount); assert!(v1.iter().all(|e| *e < length as u32)); assert_eq!(v1, v2); From 98889f21bf3cc30b1e92f9d13cdd772356423d76 Mon Sep 17 00:00:00 2001 From: Diggory Hardy <git@dhardy.name> Date: Sat, 2 Jun 2018 15:22:47 +0100 Subject: [PATCH 06/14] sample_indices: update selection heuristics Update with new benchmark data from `u32` impls of Floyd's and cached algorithms (inplace alg already used benchmarks from `u32` impl). Update Floyd's with a balanced model adequate for both shuffled and unshuffled versions. --- src/seq.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/seq.rs b/src/seq.rs index 72f7e58953d..62b6824f548 100644 --- a/src/seq.rs +++ b/src/seq.rs @@ -538,17 +538,17 @@ pub fn sample_indices<R>(rng: &mut R, length: usize, amount: usize, // https://github.com/rust-lang-nursery/rand/pull/479 // We do some calculations with u64 to avoid overflow. - if amount < 517 { - const C: [[u64; 2]; 2] = [[1, 36], [200, 440]]; + if amount < 442 { + const C: [[u64; 2]; 2] = [[5, 45], [50, 350]]; let j = if length < 500_000 { 0 } else { 1 }; - let m4 = 4 * amount as u64; + let m4 = 6 * amount as u64; if C[0][j] * (length as u64) < (C[1][j] + m4) * amount as u64 { sample_indices_inplace(rng, length, amount) } else { sample_indices_floyd(rng, length, amount, shuffled) } } else { - const C: [[u64; 2]; 2] = [[1, 36], [62*40, 68*40]]; + const C: [[u64; 2]; 2] = [[1, 9], [590, 600]]; let j = if length < 500_000 { 0 } else { 1 }; if C[0][j] * (length as u64) < C[1][j] * amount as u64 { sample_indices_inplace(rng, length, amount) From c95f8969d3525a0acb82929cda26ba162439fc80 Mon Sep 17 00:00:00 2001 From: Diggory Hardy <git@dhardy.name> Date: Wed, 13 Jun 2018 12:22:17 +0100 Subject: [PATCH 07/14] sample_indices: use f32 in heuristics and add short-cut Motivation: don't have to worry about overflow whichever index type is used. Appears to slightly improve some benchmarks, no affect on others. --- src/seq.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/seq.rs b/src/seq.rs index 62b6824f548..71867ee6b22 100644 --- a/src/seq.rs +++ b/src/seq.rs @@ -536,21 +536,23 @@ pub fn sample_indices<R>(rng: &mut R, length: usize, amount: usize, // Choice of algorithm here depends on both length and amount. See: // https://github.com/rust-lang-nursery/rand/pull/479 - // We do some calculations with u64 to avoid overflow. + // We do some calculations with f32. Accuracy is not very important. if amount < 442 { - const C: [[u64; 2]; 2] = [[5, 45], [50, 350]]; + const C: [[f32; 2]; 2] = [[1.2, 6.0/45.0], [10.0, 70.0/9.0]]; let j = if length < 500_000 { 0 } else { 1 }; - let m4 = 6 * amount as u64; - if C[0][j] * (length as u64) < (C[1][j] + m4) * amount as u64 { + let amount_fp = amount as f32; + let m4 = C[0][j] * amount_fp; + // Short-cut: when amount < 12, floyd's is always faster + if amount > 11 && (length as f32) < (C[1][j] + m4) * amount_fp { sample_indices_inplace(rng, length, amount) } else { sample_indices_floyd(rng, length, amount, shuffled) } } else { - const C: [[u64; 2]; 2] = [[1, 9], [590, 600]]; + const C: [f32; 2] = [590.0, 600.0/9.0]; let j = if length < 500_000 { 0 } else { 1 }; - if C[0][j] * (length as u64) < C[1][j] * amount as u64 { + if (length as f32) < C[j] * (amount as f32) { sample_indices_inplace(rng, length, amount) } else { sample_indices_cache(rng, length, amount) From 59d0823a78ec431c05be98fbbf5688861ad45580 Mon Sep 17 00:00:00 2001 From: Diggory Hardy <git@dhardy.name> Date: Thu, 28 Jun 2018 14:55:05 +0100 Subject: [PATCH 08/14] sample_indices: abstract over return type This is to allow use of u32 or usize internally --- src/lib.rs | 2 +- src/seq.rs | 206 +++++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 160 insertions(+), 48 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index f07a68c495a..197fc2546fc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -234,7 +234,7 @@ #![cfg_attr(feature = "wasm-bindgen", feature(wasm_import_module))] #[cfg(feature = "std")] extern crate core; -#[cfg(all(feature = "alloc", not(feature="std")))] extern crate alloc; +#[cfg(all(feature = "alloc", not(feature="std")))] #[macro_use] extern crate alloc; #[cfg(feature="simd_support")] extern crate packed_simd; diff --git a/src/seq.rs b/src/seq.rs index 71867ee6b22..0e598f2a8dc 100644 --- a/src/seq.rs +++ b/src/seq.rs @@ -13,6 +13,7 @@ //! TODO: module doc #[cfg(feature="alloc")] use core::ops::Index; +#[cfg(feature="alloc")] use core::slice; #[cfg(feature="std")] use std::vec; #[cfg(all(feature="alloc", not(feature="std")))] use alloc::vec; @@ -326,7 +327,7 @@ impl<T> SliceRandom for [T] { SliceChooseIter { slice: self, _phantom: Default::default(), - indices: sample_indices(rng, self.len(), amount, shuffled).into_iter(), + indices: sample_indices(rng, self.len(), amount, shuffled).into_iter_usize(), } } @@ -398,7 +399,7 @@ impl<I> IteratorRandom for I where I: Iterator + Sized {} pub struct SliceChooseIter<'a, S: ?Sized + 'a, T: 'a> { slice: &'a S, _phantom: ::core::marker::PhantomData<T>, - indices: vec::IntoIter<u32>, + indices: IndicesIntoIter, } #[cfg(feature = "alloc")] @@ -466,7 +467,7 @@ pub fn sample_slice<R, T>(rng: &mut R, slice: &[T], amount: usize) -> Vec<T> let indices = sample_indices(rng, slice.len(), amount, true); let mut out = Vec::with_capacity(amount); - out.extend(indices.iter().map(|i| slice[*i as usize].clone())); + out.extend(indices.iter_usize().map(|i| slice[i].clone())); out } @@ -489,10 +490,126 @@ pub fn sample_slice_ref<'a, R, T>(rng: &mut R, slice: &'a [T], amount: usize) -> let indices = sample_indices(rng, slice.len(), amount, true); let mut out = Vec::with_capacity(amount); - out.extend(indices.iter().map(|i| &slice[*i as usize])); + out.extend(indices.iter_usize().map(|i| &slice[i])); out } +/// Return type of `sample_indices`. +#[cfg(feature = "alloc")] +#[derive(Clone, Debug, PartialEq)] +pub enum Indices { + /// Representation: a vector over `u32` values + U32(Vec<u32>) +} + +#[cfg(feature = "alloc")] +impl Indices { + /// Returns the number of indices + pub fn len(&self) -> usize { + match self { + &Indices::U32(ref v) => v.len(), + } + } + + /// Return result as a `Vec<usize>`. Conversion may or may not be trivial. + pub fn into_vec_usize(self) -> Vec<usize> { + match self { + Indices::U32(v) => v.into_iter().map(|i| i as usize).collect(), + } + } + + /// Iterate over the indices as a sequence of `usize` values + pub fn iter_usize<'a>(&'a self) -> IndicesIter<'a> { + match self { + &Indices::U32(ref v) => IndicesIter::U32(v.iter()), + } + } + + /// Convert into an iterator over the indices as a sequence of `usize` values + pub fn into_iter_usize(self) -> IndicesIntoIter { + match self { + Indices::U32(v) => IndicesIntoIter::U32(v.into_iter()), + } + } +} + +#[cfg(feature = "alloc")] +impl From<Vec<u32>> for Indices { + fn from(v: Vec<u32>) -> Self { + Indices::U32(v) + } +} + +/// Return type of `Indices::iter_usize`. +#[cfg(feature = "alloc")] +#[derive(Debug)] +pub enum IndicesIter<'a> { + #[doc(hidden)] U32(slice::Iter<'a, u32>), +} + +#[cfg(feature = "alloc")] +impl<'a> Iterator for IndicesIter<'a> { + type Item = usize; + fn next(&mut self) -> Option<usize> { + use self::IndicesIter::*; + match self { + &mut U32(ref mut iter) => iter.next().map(|i| *i as usize), + } + } + + fn size_hint(&self) -> (usize, Option<usize>) { + match self { + &IndicesIter::U32(ref v) => v.size_hint(), + } + } +} + +#[cfg(feature = "alloc")] +impl<'a> ExactSizeIterator for IndicesIter<'a> { + fn len(&self) -> usize { + match self { + &IndicesIter::U32(ref v) => v.len(), + } + } +} + +/// Return type of `Indices::into_iter_usize`. +#[cfg(feature = "alloc")] +#[derive(Clone, Debug)] +pub enum IndicesIntoIter { + #[doc(hidden)] U32(vec::IntoIter<u32>), +} + +#[cfg(feature = "alloc")] +impl Iterator for IndicesIntoIter { + type Item = usize; + + fn next(&mut self) -> Option<Self::Item> { + use self::IndicesIntoIter::*; + match self { + &mut U32(ref mut v) => v.next().map(|i| i as usize), + } + } + + fn size_hint(&self) -> (usize, Option<usize>) { + use self::IndicesIntoIter::*; + match self { + &U32(ref v) => v.size_hint(), + } + } +} + +#[cfg(feature = "alloc")] +impl ExactSizeIterator for IndicesIntoIter { + fn len(&self) -> usize { + use self::IndicesIntoIter::*; + match self { + &U32(ref v) => v.len(), + } + } +} + + /// Randomly sample exactly `amount` distinct indices from `0..length`. /// /// If `shuffled == true` then the sampled values will be fully shuffled; @@ -522,7 +639,7 @@ pub fn sample_slice_ref<'a, R, T>(rng: &mut R, slice: &'a [T], amount: usize) -> /// Panics if `amount > length` or if `length` is not reprentable as a `u32`. #[cfg(feature = "alloc")] pub fn sample_indices<R>(rng: &mut R, length: usize, amount: usize, - shuffled: bool) -> Vec<u32> + shuffled: bool) -> Indices where R: Rng + ?Sized, { if amount > length { @@ -569,7 +686,7 @@ pub fn sample_indices<R>(rng: &mut R, length: usize, amount: usize, /// /// This implementation uses `O(amount)` memory and `O(amount^2)` time. #[cfg(feature = "alloc")] -fn sample_indices_floyd<R>(rng: &mut R, length: u32, amount: u32, shuffled: bool) -> Vec<u32> +fn sample_indices_floyd<R>(rng: &mut R, length: u32, amount: u32, shuffled: bool) -> Indices where R: Rng + ?Sized, { debug_assert!(amount <= length); @@ -587,7 +704,7 @@ fn sample_indices_floyd<R>(rng: &mut R, length: u32, amount: u32, shuffled: bool // shuffling, but it is slow because it requires arbitrary insertions. indices.shuffle(rng); } - indices + Indices::from(indices) } /// Randomly sample exactly `amount` indices from `0..length`, using an inplace @@ -605,8 +722,7 @@ fn sample_indices_floyd<R>(rng: &mut R, length: u32, amount: u32, shuffled: bool /// allocations. Set-up is `O(length)` time and memory and shuffling is /// `O(amount)` time. #[cfg(feature = "alloc")] -fn sample_indices_inplace<R>(rng: &mut R, length: u32, amount: u32) - -> Vec<u32> +fn sample_indices_inplace<R>(rng: &mut R, length: u32, amount: u32) -> Indices where R: Rng + ?Sized, { debug_assert!(amount <= length); @@ -618,7 +734,7 @@ fn sample_indices_inplace<R>(rng: &mut R, length: u32, amount: u32) } indices.truncate(amount as usize); debug_assert_eq!(indices.len(), amount as usize); - indices + Indices::from(indices) } /// Randomly sample exactly `amount` indices from `0..length`, using a @@ -629,8 +745,7 @@ fn sample_indices_inplace<R>(rng: &mut R, length: u32, amount: u32) /// values from `1_000_000`. The algorithm is `O(amount)` time and memory, /// but due to overheads will often be slower than other approaches. #[cfg(feature = "alloc")] -fn sample_indices_cache<R>(rng: &mut R, length: u32, amount: u32) - -> Vec<u32> +fn sample_indices_cache<R>(rng: &mut R, length: u32, amount: u32) -> Indices where R: Rng + ?Sized, { debug_assert!(amount <= length); @@ -655,7 +770,7 @@ fn sample_indices_cache<R>(rng: &mut R, length: u32, amount: u32) indices.push(x_j); // push at position i } debug_assert_eq!(indices.len(), amount as usize); - indices + Indices::from(indices) } #[cfg(test)] @@ -816,25 +931,25 @@ mod test { let v = sample_slice(&mut r, &[42, 133], 2); assert!(&v[..] == [42, 133] || v[..] == [133, 42]); - assert_eq!(&sample_indices_inplace(&mut r, 0, 0)[..], [0; 0]); - assert_eq!(&sample_indices_inplace(&mut r, 1, 0)[..], [0; 0]); - assert_eq!(&sample_indices_inplace(&mut r, 1, 1)[..], [0]); + assert_eq!(sample_indices_inplace(&mut r, 0, 0).len(), 0); + assert_eq!(sample_indices_inplace(&mut r, 1, 0).len(), 0); + assert_eq!(sample_indices_inplace(&mut r, 1, 1).into_vec_usize(), vec![0]); - assert_eq!(&sample_indices_cache(&mut r, 0, 0)[..], [0; 0]); - assert_eq!(&sample_indices_cache(&mut r, 1, 0)[..], [0; 0]); - assert_eq!(&sample_indices_cache(&mut r, 1, 1)[..], [0]); + assert_eq!(sample_indices_cache(&mut r, 0, 0).len(), 0); + assert_eq!(sample_indices_cache(&mut r, 1, 0).len(), 0); + assert_eq!(sample_indices_cache(&mut r, 1, 1).into_vec_usize(), vec![0]); - assert_eq!(&sample_indices_floyd(&mut r, 0, 0, false)[..], [0; 0]); - assert_eq!(&sample_indices_floyd(&mut r, 1, 0, false)[..], [0; 0]); - assert_eq!(&sample_indices_floyd(&mut r, 1, 1, false)[..], [0]); + assert_eq!(sample_indices_floyd(&mut r, 0, 0, false).len(), 0); + assert_eq!(sample_indices_floyd(&mut r, 1, 0, false).len(), 0); + assert_eq!(sample_indices_floyd(&mut r, 1, 1, false).into_vec_usize(), vec![0]); // These algorithms should be fast with big numbers. Test average. - let sum = sample_indices_cache(&mut r, 1 << 25, 10) - .iter().fold(0, |a, b| a + b); + let indices = sample_indices_cache(&mut r, 1 << 25, 10); + let sum: usize = indices.iter_usize().sum(); assert!(1 << 25 < sum && sum < (1 << 25) * 25); - let sum = sample_indices_floyd(&mut r, 1 << 25, 10, false) - .iter().fold(0, |a, b| a + b); + let indices = sample_indices_floyd(&mut r, 1 << 25, 10, false); + let sum: usize = indices.iter_usize().sum(); assert!(1 << 25 < sum && sum < (1 << 25) * 25); // Make sure lucky 777's aren't lucky @@ -872,16 +987,15 @@ mod test { let regular = sample_indices( &mut xor_rng(seed), length, amount, true); assert_eq!(regular.len(), amount); - assert!(regular.iter().all(|e| *e < length as u32)); + assert!(regular.iter_usize().all(|e| e < length)); // also test that sampling the slice works let vec: Vec<u32> = (0..(length as u32)).collect(); let result = sample_slice(&mut xor_rng(seed), &vec, amount); - assert_eq!(result, regular); + assert_eq!(result, regular.iter_usize().map(|i| i as u32).collect::<Vec<_>>()); let result = sample_slice_ref(&mut xor_rng(seed), &vec, amount); - let expected = regular.iter().map(|v| v).collect::<Vec<_>>(); - assert_eq!(result, expected); + assert!(result.iter().zip(regular.iter_usize()).all(|(i,j)| **i == j as u32)); } } @@ -894,37 +1008,35 @@ mod test { let mut seed = [0u8; 16]; // We can't test which algorithm is used directly, but Floyd's alg - // should produce different results from the others. + // should produce different results from the others. (Also, `inplace` + // and `cached` currently use different sizes thus produce different results.) // A small length and relatively large amount should use inplace r.fill(&mut seed); - let (length, amount): (u32, u32) = (100, 50); - let v1 = sample_indices(&mut xor_rng(seed), length as usize, amount as usize, true); - let v2 = sample_indices_inplace(&mut xor_rng(seed), length, amount); - assert!(v1.iter().all(|e| *e < length)); + let (length, amount): (usize, usize) = (100, 50); + let v1 = sample_indices(&mut xor_rng(seed), length, amount, true); + let v2 = sample_indices_inplace(&mut xor_rng(seed), length as u32, amount as u32); + assert!(v1.iter_usize().all(|e| e < length)); assert_eq!(v1, v2); // Test Floyd's alg does produce different results - let v3 = sample_indices_floyd(&mut xor_rng(seed), length, amount, true); + let v3 = sample_indices_floyd(&mut xor_rng(seed), length as u32, amount as u32, true); assert!(v1 != v3); - // However, the cache alg should produce the same results - let v4 = sample_indices_cache(&mut xor_rng(seed), length, amount); - assert_eq!(v1, v4); // A large length and small amount should use Floyd r.fill(&mut seed); - let (length, amount): (u32, u32) = (1<<20, 50); - let v1 = sample_indices(&mut xor_rng(seed), length as usize, amount as usize, true); - let v2 = sample_indices_floyd(&mut xor_rng(seed), length, amount, true); - assert!(v1.iter().all(|e| *e < length)); + let (length, amount): (usize, usize) = (1<<20, 50); + let v1 = sample_indices(&mut xor_rng(seed), length, amount, true); + let v2 = sample_indices_floyd(&mut xor_rng(seed), length as u32, amount as u32, true); + assert!(v1.iter_usize().all(|e| e < length)); assert_eq!(v1, v2); // A large length and larger amount should use cache r.fill(&mut seed); - let (length, amount): (u32, u32) = (1<<20, 600); - let v1 = sample_indices(&mut xor_rng(seed), length as usize, amount as usize, true); - let v2 = sample_indices_cache(&mut xor_rng(seed), length, amount); - assert!(v1.iter().all(|e| *e < length as u32)); + let (length, amount): (usize, usize) = (1<<20, 600); + let v1 = sample_indices(&mut xor_rng(seed), length, amount, true); + let v2 = sample_indices_cache(&mut xor_rng(seed), length as u32, amount as u32); + assert!(v1.iter_usize().all(|e| e < length)); assert_eq!(v1, v2); } From 5fc1da8d8cb1a21e6c7327073868d659943ff353 Mon Sep 17 00:00:00 2001 From: Diggory Hardy <git@dhardy.name> Date: Thu, 28 Jun 2018 15:03:07 +0100 Subject: [PATCH 09/14] sample_indices: also support usize internally --- src/seq.rs | 65 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 13 deletions(-) diff --git a/src/seq.rs b/src/seq.rs index 0e598f2a8dc..78849b3b460 100644 --- a/src/seq.rs +++ b/src/seq.rs @@ -496,10 +496,12 @@ pub fn sample_slice_ref<'a, R, T>(rng: &mut R, slice: &'a [T], amount: usize) -> /// Return type of `sample_indices`. #[cfg(feature = "alloc")] -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug)] pub enum Indices { /// Representation: a vector over `u32` values - U32(Vec<u32>) + U32(Vec<u32>), + /// Representation: a vector over `usize` values + USize(Vec<usize>), } #[cfg(feature = "alloc")] @@ -508,6 +510,7 @@ impl Indices { pub fn len(&self) -> usize { match self { &Indices::U32(ref v) => v.len(), + &Indices::USize(ref v) => v.len(), } } @@ -515,6 +518,7 @@ impl Indices { pub fn into_vec_usize(self) -> Vec<usize> { match self { Indices::U32(v) => v.into_iter().map(|i| i as usize).collect(), + Indices::USize(v) => v, } } @@ -522,6 +526,7 @@ impl Indices { pub fn iter_usize<'a>(&'a self) -> IndicesIter<'a> { match self { &Indices::U32(ref v) => IndicesIter::U32(v.iter()), + &Indices::USize(ref v) => IndicesIter::USize(v.iter()), } } @@ -529,6 +534,20 @@ impl Indices { pub fn into_iter_usize(self) -> IndicesIntoIter { match self { Indices::U32(v) => IndicesIntoIter::U32(v.into_iter()), + Indices::USize(v) => IndicesIntoIter::USize(v.into_iter()), + } + } +} + +#[cfg(feature = "alloc")] +impl PartialEq for Indices { + fn eq(&self, other: &Indices) -> bool { + use self::Indices::*; + match (self, other) { + (&U32(ref v1), &U32(ref v2)) => v1 == v2, + (&USize(ref v1), &USize(ref v2)) => v1 == v2, + (a @ _, b @ _) => (a.len() == b.len()) && + (a.iter_usize().zip(b.iter_usize()).all(|(x, y)| x == y)), } } } @@ -540,11 +559,19 @@ impl From<Vec<u32>> for Indices { } } +#[cfg(feature = "alloc")] +impl From<Vec<usize>> for Indices { + fn from(v: Vec<usize>) -> Self { + Indices::USize(v) + } +} + /// Return type of `Indices::iter_usize`. #[cfg(feature = "alloc")] #[derive(Debug)] pub enum IndicesIter<'a> { #[doc(hidden)] U32(slice::Iter<'a, u32>), + #[doc(hidden)] USize(slice::Iter<'a, usize>), } #[cfg(feature = "alloc")] @@ -554,12 +581,14 @@ impl<'a> Iterator for IndicesIter<'a> { use self::IndicesIter::*; match self { &mut U32(ref mut iter) => iter.next().map(|i| *i as usize), + &mut USize(ref mut iter) => iter.next().cloned(), } } fn size_hint(&self) -> (usize, Option<usize>) { match self { &IndicesIter::U32(ref v) => v.size_hint(), + &IndicesIter::USize(ref v) => v.size_hint(), } } } @@ -569,6 +598,7 @@ impl<'a> ExactSizeIterator for IndicesIter<'a> { fn len(&self) -> usize { match self { &IndicesIter::U32(ref v) => v.len(), + &IndicesIter::USize(ref v) => v.len(), } } } @@ -578,6 +608,7 @@ impl<'a> ExactSizeIterator for IndicesIter<'a> { #[derive(Clone, Debug)] pub enum IndicesIntoIter { #[doc(hidden)] U32(vec::IntoIter<u32>), + #[doc(hidden)] USize(vec::IntoIter<usize>), } #[cfg(feature = "alloc")] @@ -588,6 +619,7 @@ impl Iterator for IndicesIntoIter { use self::IndicesIntoIter::*; match self { &mut U32(ref mut v) => v.next().map(|i| i as usize), + &mut USize(ref mut v) => v.next(), } } @@ -595,6 +627,7 @@ impl Iterator for IndicesIntoIter { use self::IndicesIntoIter::*; match self { &U32(ref v) => v.size_hint(), + &USize(ref v) => v.size_hint(), } } } @@ -605,6 +638,7 @@ impl ExactSizeIterator for IndicesIntoIter { use self::IndicesIntoIter::*; match self { &U32(ref v) => v.len(), + &USize(ref v) => v.len(), } } } @@ -630,13 +664,14 @@ impl ExactSizeIterator for IndicesIntoIter { /// is closer to `O(amount^2)`, and when `length` is close to `amount` then /// `O(length)`. /// -/// Note that we only support `u32` indices since this covers the vast majority -/// of uses, and performance is significantly better than with `u64`. +/// Note that performance is significantly better over `u32` indices than over +/// `u64` indices. Because of this we hide the underlying type behind an +/// abstraction, `Indices`. /// /// If an allocation-free `no_std` function is required, it is suggested /// to adapt the internal `sample_indices_floyd` implementation. /// -/// Panics if `amount > length` or if `length` is not reprentable as a `u32`. +/// Panics if `amount > length`. #[cfg(feature = "alloc")] pub fn sample_indices<R>(rng: &mut R, length: usize, amount: usize, shuffled: bool) -> Indices @@ -646,7 +681,9 @@ pub fn sample_indices<R>(rng: &mut R, length: usize, amount: usize, panic!("`amount` of samples must be less than or equal to `length`"); } if length > (::core::u32::MAX as usize) { - panic!("`length` is not representable as `u32`"); + // We never want to use inplace here, but could use floyd's alg + // Lazy version: always use the cache alg. + return sample_indices_cache(rng, length, amount); } let amount = amount as u32; let length = length as u32; @@ -672,7 +709,9 @@ pub fn sample_indices<R>(rng: &mut R, length: usize, amount: usize, if (length as f32) < C[j] * (amount as f32) { sample_indices_inplace(rng, length, amount) } else { - sample_indices_cache(rng, length, amount) + // note: could have a specific u32 impl, but I'm lazy and + // generics don't have usable conversions + sample_indices_cache(rng, length as usize, amount as usize) } } } @@ -745,15 +784,15 @@ fn sample_indices_inplace<R>(rng: &mut R, length: u32, amount: u32) -> Indices /// values from `1_000_000`. The algorithm is `O(amount)` time and memory, /// but due to overheads will often be slower than other approaches. #[cfg(feature = "alloc")] -fn sample_indices_cache<R>(rng: &mut R, length: u32, amount: u32) -> Indices +fn sample_indices_cache<R>(rng: &mut R, length: usize, amount: usize) -> Indices where R: Rng + ?Sized, { debug_assert!(amount <= length); - #[cfg(feature="std")] let mut cache = HashMap::with_capacity(amount as usize); + #[cfg(feature="std")] let mut cache = HashMap::with_capacity(amount); #[cfg(not(feature="std"))] let mut cache = BTreeMap::new(); - let mut indices = Vec::with_capacity(amount as usize); + let mut indices = Vec::with_capacity(amount); for i in 0..amount { - let j: u32 = rng.gen_range(i, length); + let j: usize = rng.gen_range(i, length); // get the current values at i and j ... let x_i = match cache.get(&i) { @@ -769,7 +808,7 @@ fn sample_indices_cache<R>(rng: &mut R, length: u32, amount: u32) -> Indices cache.insert(j, x_i); indices.push(x_j); // push at position i } - debug_assert_eq!(indices.len(), amount as usize); + debug_assert_eq!(indices.len(), amount); Indices::from(indices) } @@ -1035,7 +1074,7 @@ mod test { r.fill(&mut seed); let (length, amount): (usize, usize) = (1<<20, 600); let v1 = sample_indices(&mut xor_rng(seed), length, amount, true); - let v2 = sample_indices_cache(&mut xor_rng(seed), length as u32, amount as u32); + let v2 = sample_indices_cache(&mut xor_rng(seed), length, amount); assert!(v1.iter_usize().all(|e| e < length)); assert_eq!(v1, v2); } From d624e840ba24ec5620cf57ccb2376e110923b538 Mon Sep 17 00:00:00 2001 From: Diggory Hardy <git@dhardy.name> Date: Mon, 2 Jul 2018 13:05:45 +0100 Subject: [PATCH 10/14] sample_indices_cache: use rejection sampling instead --- src/seq.rs | 57 ++++++++++++++++++++++-------------------------------- 1 file changed, 23 insertions(+), 34 deletions(-) diff --git a/src/seq.rs b/src/seq.rs index 78849b3b460..e9648181564 100644 --- a/src/seq.rs +++ b/src/seq.rs @@ -19,12 +19,12 @@ #[cfg(all(feature="alloc", not(feature="std")))] use alloc::vec; #[cfg(all(feature="alloc", not(feature="std")))] use alloc::vec::Vec; // BTreeMap is not as fast in tests, but better than nothing. -#[cfg(feature="std")] use std::collections::HashMap; -#[cfg(all(feature="alloc", not(feature="std")))] use alloc::collections::BTreeMap; +#[cfg(feature="std")] use std::collections::{HashSet}; +#[cfg(all(feature="alloc", not(feature="std")))] use alloc::collections::BTreeSet; -#[cfg(feature = "alloc")] use distributions::WeightedError; - -use super::Rng; +use Rng; +#[cfg(feature="alloc")] use distributions::{Distribution, Uniform}; +#[cfg(feature="alloc")] use distributions::WeightedError; #[cfg(feature="alloc")] use distributions::uniform::{SampleUniform, SampleBorrow}; /// Extension trait on slices, providing random mutation and sampling methods. @@ -408,7 +408,7 @@ impl<'a, S: Index<usize, Output = T> + ?Sized + 'a, T: 'a> Iterator for SliceCho fn next(&mut self) -> Option<Self::Item> { // TODO: investigate using SliceIndex::get_unchecked when stable - self.indices.next().map(|i| &(*self.slice)[i as usize]) + self.indices.next().map(|i| &self.slice[i as usize]) } fn size_hint(&self) -> (usize, Option<usize>) { @@ -776,38 +776,29 @@ fn sample_indices_inplace<R>(rng: &mut R, length: u32, amount: u32) -> Indices Indices::from(indices) } -/// Randomly sample exactly `amount` indices from `0..length`, using a -/// dynamically-cached partial Fisher-Yates method. -/// -/// The cache avoids allocating the entire `length` of values. This is -/// especially useful when `amount <<< length`; e.g. selecting 3 non-repeating -/// values from `1_000_000`. The algorithm is `O(amount)` time and memory, -/// but due to overheads will often be slower than other approaches. +/// Randomly sample exactly `amount` indices from `0..length`, using rejection +/// sampling. +/// +/// Since `amount <<< length` there is a low chance of a random sample in +/// `0..length` being a duplicate. We test for duplicates and resample where +/// necessary. The algorithm is `O(amount)` time and memory. #[cfg(feature = "alloc")] fn sample_indices_cache<R>(rng: &mut R, length: usize, amount: usize) -> Indices where R: Rng + ?Sized, { - debug_assert!(amount <= length); - #[cfg(feature="std")] let mut cache = HashMap::with_capacity(amount); - #[cfg(not(feature="std"))] let mut cache = BTreeMap::new(); + debug_assert!(amount < length); + #[cfg(feature="std")] let mut cache = HashSet::with_capacity(amount); + #[cfg(not(feature="std"))] let mut cache = BTreeSet::new(); + let distr = Uniform::new(0, length); let mut indices = Vec::with_capacity(amount); - for i in 0..amount { - let j: usize = rng.gen_range(i, length); - - // get the current values at i and j ... - let x_i = match cache.get(&i) { - Some(x) => *x, - None => i, - }; - let x_j = match cache.get(&j) { - Some(x) => *x, - None => j, - }; - - // ... and swap them - cache.insert(j, x_i); - indices.push(x_j); // push at position i + for _ in 0..amount { + let mut pos = distr.sample(rng); + while !cache.insert(pos) { + pos = distr.sample(rng); + } + indices.push(pos); } + debug_assert_eq!(indices.len(), amount); Indices::from(indices) } @@ -974,9 +965,7 @@ mod test { assert_eq!(sample_indices_inplace(&mut r, 1, 0).len(), 0); assert_eq!(sample_indices_inplace(&mut r, 1, 1).into_vec_usize(), vec![0]); - assert_eq!(sample_indices_cache(&mut r, 0, 0).len(), 0); assert_eq!(sample_indices_cache(&mut r, 1, 0).len(), 0); - assert_eq!(sample_indices_cache(&mut r, 1, 1).into_vec_usize(), vec![0]); assert_eq!(sample_indices_floyd(&mut r, 0, 0, false).len(), 0); assert_eq!(sample_indices_floyd(&mut r, 1, 0, false).len(), 0); From 91f0af25045fb0ca3b9bb003a91629d2c2643dcc Mon Sep 17 00:00:00 2001 From: Diggory Hardy <git@dhardy.name> Date: Wed, 4 Jul 2018 17:50:30 +0100 Subject: [PATCH 11/14] =?UTF-8?q?sample=5Findices:=20rename=20Indices=20?= =?UTF-8?q?=E2=86=92=20IndexVec;=20some=20revisions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/seq.rs | 160 ++++++++++++++++++++++++++--------------------------- 1 file changed, 79 insertions(+), 81 deletions(-) diff --git a/src/seq.rs b/src/seq.rs index e9648181564..3c48a2135d9 100644 --- a/src/seq.rs +++ b/src/seq.rs @@ -327,7 +327,7 @@ impl<T> SliceRandom for [T] { SliceChooseIter { slice: self, _phantom: Default::default(), - indices: sample_indices(rng, self.len(), amount, shuffled).into_iter_usize(), + indices: sample_indices(rng, self.len(), amount, shuffled).into_iter(), } } @@ -399,7 +399,7 @@ impl<I> IteratorRandom for I where I: Iterator + Sized {} pub struct SliceChooseIter<'a, S: ?Sized + 'a, T: 'a> { slice: &'a S, _phantom: ::core::marker::PhantomData<T>, - indices: IndicesIntoIter, + indices: IndexVecIntoIter, } #[cfg(feature = "alloc")] @@ -464,10 +464,10 @@ pub fn sample_slice<R, T>(rng: &mut R, slice: &[T], amount: usize) -> Vec<T> where R: Rng + ?Sized, T: Clone { - let indices = sample_indices(rng, slice.len(), amount, true); + let indices = sample_indices(rng, slice.len(), amount, true).into_iter(); let mut out = Vec::with_capacity(amount); - out.extend(indices.iter_usize().map(|i| slice[i].clone())); + out.extend(indices.map(|i| slice[i].clone())); out } @@ -487,98 +487,111 @@ pub fn sample_slice<R, T>(rng: &mut R, slice: &[T], amount: usize) -> Vec<T> pub fn sample_slice_ref<'a, R, T>(rng: &mut R, slice: &'a [T], amount: usize) -> Vec<&'a T> where R: Rng + ?Sized { - let indices = sample_indices(rng, slice.len(), amount, true); + let indices = sample_indices(rng, slice.len(), amount, true).into_iter(); let mut out = Vec::with_capacity(amount); - out.extend(indices.iter_usize().map(|i| &slice[i])); + out.extend(indices.map(|i| &slice[i])); out } -/// Return type of `sample_indices`. +/// A vector of indices. +/// +/// Multiple internal representations are possible. #[cfg(feature = "alloc")] #[derive(Clone, Debug)] -pub enum Indices { - /// Representation: a vector over `u32` values - U32(Vec<u32>), - /// Representation: a vector over `usize` values - USize(Vec<usize>), +pub enum IndexVec { + #[doc(hidden)] U32(Vec<u32>), + #[doc(hidden)] USize(Vec<usize>), } #[cfg(feature = "alloc")] -impl Indices { +impl IndexVec { /// Returns the number of indices pub fn len(&self) -> usize { match self { - &Indices::U32(ref v) => v.len(), - &Indices::USize(ref v) => v.len(), + &IndexVec::U32(ref v) => v.len(), + &IndexVec::USize(ref v) => v.len(), + } + } + + /// Return the value at the given `index`. + /// + /// (Note: we cannot implement `std::ops::Index` because of lifetime + /// restrictions.) + pub fn index(&self, index: usize) -> usize { + match self { + &IndexVec::U32(ref v) => v[index] as usize, + &IndexVec::USize(ref v) => v[index], } } /// Return result as a `Vec<usize>`. Conversion may or may not be trivial. - pub fn into_vec_usize(self) -> Vec<usize> { + pub fn into_vec(self) -> Vec<usize> { match self { - Indices::U32(v) => v.into_iter().map(|i| i as usize).collect(), - Indices::USize(v) => v, + IndexVec::U32(v) => v.into_iter().map(|i| i as usize).collect(), + IndexVec::USize(v) => v, } } /// Iterate over the indices as a sequence of `usize` values - pub fn iter_usize<'a>(&'a self) -> IndicesIter<'a> { + pub fn iter<'a>(&'a self) -> IndexVecIter<'a> { match self { - &Indices::U32(ref v) => IndicesIter::U32(v.iter()), - &Indices::USize(ref v) => IndicesIter::USize(v.iter()), + &IndexVec::U32(ref v) => IndexVecIter::U32(v.iter()), + &IndexVec::USize(ref v) => IndexVecIter::USize(v.iter()), } } /// Convert into an iterator over the indices as a sequence of `usize` values - pub fn into_iter_usize(self) -> IndicesIntoIter { + pub fn into_iter(self) -> IndexVecIntoIter { match self { - Indices::U32(v) => IndicesIntoIter::U32(v.into_iter()), - Indices::USize(v) => IndicesIntoIter::USize(v.into_iter()), + IndexVec::U32(v) => IndexVecIntoIter::U32(v.into_iter()), + IndexVec::USize(v) => IndexVecIntoIter::USize(v.into_iter()), } } } #[cfg(feature = "alloc")] -impl PartialEq for Indices { - fn eq(&self, other: &Indices) -> bool { - use self::Indices::*; +impl PartialEq for IndexVec { + fn eq(&self, other: &IndexVec) -> bool { + use self::IndexVec::*; match (self, other) { (&U32(ref v1), &U32(ref v2)) => v1 == v2, (&USize(ref v1), &USize(ref v2)) => v1 == v2, - (a @ _, b @ _) => (a.len() == b.len()) && - (a.iter_usize().zip(b.iter_usize()).all(|(x, y)| x == y)), + (&U32(ref v1), &USize(ref v2)) => (v1.len() == v2.len()) + && (v1.iter().zip(v2.iter()).all(|(x, y)| *x as usize == *y)), + (&USize(ref v1), &U32(ref v2)) => (v1.len() == v2.len()) + && (v1.iter().zip(v2.iter()).all(|(x, y)| *x == *y as usize)), } } } #[cfg(feature = "alloc")] -impl From<Vec<u32>> for Indices { +impl From<Vec<u32>> for IndexVec { fn from(v: Vec<u32>) -> Self { - Indices::U32(v) + IndexVec::U32(v) } } #[cfg(feature = "alloc")] -impl From<Vec<usize>> for Indices { +impl From<Vec<usize>> for IndexVec { fn from(v: Vec<usize>) -> Self { - Indices::USize(v) + IndexVec::USize(v) } } -/// Return type of `Indices::iter_usize`. +/// Return type of `IndexVec::iter`. #[cfg(feature = "alloc")] #[derive(Debug)] -pub enum IndicesIter<'a> { +pub enum IndexVecIter<'a> { #[doc(hidden)] U32(slice::Iter<'a, u32>), #[doc(hidden)] USize(slice::Iter<'a, usize>), } #[cfg(feature = "alloc")] -impl<'a> Iterator for IndicesIter<'a> { +impl<'a> Iterator for IndexVecIter<'a> { type Item = usize; fn next(&mut self) -> Option<usize> { - use self::IndicesIter::*; + use self::IndexVecIter::*; match self { &mut U32(ref mut iter) => iter.next().map(|i| *i as usize), &mut USize(ref mut iter) => iter.next().cloned(), @@ -587,36 +600,29 @@ impl<'a> Iterator for IndicesIter<'a> { fn size_hint(&self) -> (usize, Option<usize>) { match self { - &IndicesIter::U32(ref v) => v.size_hint(), - &IndicesIter::USize(ref v) => v.size_hint(), + &IndexVecIter::U32(ref v) => v.size_hint(), + &IndexVecIter::USize(ref v) => v.size_hint(), } } } #[cfg(feature = "alloc")] -impl<'a> ExactSizeIterator for IndicesIter<'a> { - fn len(&self) -> usize { - match self { - &IndicesIter::U32(ref v) => v.len(), - &IndicesIter::USize(ref v) => v.len(), - } - } -} +impl<'a> ExactSizeIterator for IndexVecIter<'a> {} -/// Return type of `Indices::into_iter_usize`. +/// Return type of `IndexVec::into_iter`. #[cfg(feature = "alloc")] #[derive(Clone, Debug)] -pub enum IndicesIntoIter { +pub enum IndexVecIntoIter { #[doc(hidden)] U32(vec::IntoIter<u32>), #[doc(hidden)] USize(vec::IntoIter<usize>), } #[cfg(feature = "alloc")] -impl Iterator for IndicesIntoIter { +impl Iterator for IndexVecIntoIter { type Item = usize; fn next(&mut self) -> Option<Self::Item> { - use self::IndicesIntoIter::*; + use self::IndexVecIntoIter::*; match self { &mut U32(ref mut v) => v.next().map(|i| i as usize), &mut USize(ref mut v) => v.next(), @@ -624,7 +630,7 @@ impl Iterator for IndicesIntoIter { } fn size_hint(&self) -> (usize, Option<usize>) { - use self::IndicesIntoIter::*; + use self::IndexVecIntoIter::*; match self { &U32(ref v) => v.size_hint(), &USize(ref v) => v.size_hint(), @@ -633,15 +639,7 @@ impl Iterator for IndicesIntoIter { } #[cfg(feature = "alloc")] -impl ExactSizeIterator for IndicesIntoIter { - fn len(&self) -> usize { - use self::IndicesIntoIter::*; - match self { - &U32(ref v) => v.len(), - &USize(ref v) => v.len(), - } - } -} +impl ExactSizeIterator for IndexVecIntoIter {} /// Randomly sample exactly `amount` distinct indices from `0..length`. @@ -666,7 +664,7 @@ impl ExactSizeIterator for IndicesIntoIter { /// /// Note that performance is significantly better over `u32` indices than over /// `u64` indices. Because of this we hide the underlying type behind an -/// abstraction, `Indices`. +/// abstraction, `IndexVec`. /// /// If an allocation-free `no_std` function is required, it is suggested /// to adapt the internal `sample_indices_floyd` implementation. @@ -674,7 +672,7 @@ impl ExactSizeIterator for IndicesIntoIter { /// Panics if `amount > length`. #[cfg(feature = "alloc")] pub fn sample_indices<R>(rng: &mut R, length: usize, amount: usize, - shuffled: bool) -> Indices + shuffled: bool) -> IndexVec where R: Rng + ?Sized, { if amount > length { @@ -725,7 +723,7 @@ pub fn sample_indices<R>(rng: &mut R, length: usize, amount: usize, /// /// This implementation uses `O(amount)` memory and `O(amount^2)` time. #[cfg(feature = "alloc")] -fn sample_indices_floyd<R>(rng: &mut R, length: u32, amount: u32, shuffled: bool) -> Indices +fn sample_indices_floyd<R>(rng: &mut R, length: u32, amount: u32, shuffled: bool) -> IndexVec where R: Rng + ?Sized, { debug_assert!(amount <= length); @@ -743,7 +741,7 @@ fn sample_indices_floyd<R>(rng: &mut R, length: u32, amount: u32, shuffled: bool // shuffling, but it is slow because it requires arbitrary insertions. indices.shuffle(rng); } - Indices::from(indices) + IndexVec::from(indices) } /// Randomly sample exactly `amount` indices from `0..length`, using an inplace @@ -761,7 +759,7 @@ fn sample_indices_floyd<R>(rng: &mut R, length: u32, amount: u32, shuffled: bool /// allocations. Set-up is `O(length)` time and memory and shuffling is /// `O(amount)` time. #[cfg(feature = "alloc")] -fn sample_indices_inplace<R>(rng: &mut R, length: u32, amount: u32) -> Indices +fn sample_indices_inplace<R>(rng: &mut R, length: u32, amount: u32) -> IndexVec where R: Rng + ?Sized, { debug_assert!(amount <= length); @@ -773,7 +771,7 @@ fn sample_indices_inplace<R>(rng: &mut R, length: u32, amount: u32) -> Indices } indices.truncate(amount as usize); debug_assert_eq!(indices.len(), amount as usize); - Indices::from(indices) + IndexVec::from(indices) } /// Randomly sample exactly `amount` indices from `0..length`, using rejection @@ -783,7 +781,7 @@ fn sample_indices_inplace<R>(rng: &mut R, length: u32, amount: u32) -> Indices /// `0..length` being a duplicate. We test for duplicates and resample where /// necessary. The algorithm is `O(amount)` time and memory. #[cfg(feature = "alloc")] -fn sample_indices_cache<R>(rng: &mut R, length: usize, amount: usize) -> Indices +fn sample_indices_cache<R>(rng: &mut R, length: usize, amount: usize) -> IndexVec where R: Rng + ?Sized, { debug_assert!(amount < length); @@ -800,7 +798,7 @@ fn sample_indices_cache<R>(rng: &mut R, length: usize, amount: usize) -> Indices } debug_assert_eq!(indices.len(), amount); - Indices::from(indices) + IndexVec::from(indices) } #[cfg(test)] @@ -963,21 +961,21 @@ mod test { assert_eq!(sample_indices_inplace(&mut r, 0, 0).len(), 0); assert_eq!(sample_indices_inplace(&mut r, 1, 0).len(), 0); - assert_eq!(sample_indices_inplace(&mut r, 1, 1).into_vec_usize(), vec![0]); + assert_eq!(sample_indices_inplace(&mut r, 1, 1).into_vec(), vec![0]); assert_eq!(sample_indices_cache(&mut r, 1, 0).len(), 0); assert_eq!(sample_indices_floyd(&mut r, 0, 0, false).len(), 0); assert_eq!(sample_indices_floyd(&mut r, 1, 0, false).len(), 0); - assert_eq!(sample_indices_floyd(&mut r, 1, 1, false).into_vec_usize(), vec![0]); + assert_eq!(sample_indices_floyd(&mut r, 1, 1, false).into_vec(), vec![0]); // These algorithms should be fast with big numbers. Test average. - let indices = sample_indices_cache(&mut r, 1 << 25, 10); - let sum: usize = indices.iter_usize().sum(); + let sum: usize = sample_indices_cache(&mut r, 1 << 25, 10) + .into_iter().sum(); assert!(1 << 25 < sum && sum < (1 << 25) * 25); - let indices = sample_indices_floyd(&mut r, 1 << 25, 10, false); - let sum: usize = indices.iter_usize().sum(); + let sum: usize = sample_indices_floyd(&mut r, 1 << 25, 10, false) + .into_iter().sum(); assert!(1 << 25 < sum && sum < (1 << 25) * 25); // Make sure lucky 777's aren't lucky @@ -1015,15 +1013,15 @@ mod test { let regular = sample_indices( &mut xor_rng(seed), length, amount, true); assert_eq!(regular.len(), amount); - assert!(regular.iter_usize().all(|e| e < length)); + assert!(regular.iter().all(|e| e < length)); // also test that sampling the slice works let vec: Vec<u32> = (0..(length as u32)).collect(); let result = sample_slice(&mut xor_rng(seed), &vec, amount); - assert_eq!(result, regular.iter_usize().map(|i| i as u32).collect::<Vec<_>>()); + assert_eq!(result, regular.iter().map(|i| i as u32).collect::<Vec<_>>()); let result = sample_slice_ref(&mut xor_rng(seed), &vec, amount); - assert!(result.iter().zip(regular.iter_usize()).all(|(i,j)| **i == j as u32)); + assert!(result.iter().zip(regular.iter()).all(|(i,j)| **i == j as u32)); } } @@ -1044,7 +1042,7 @@ mod test { let (length, amount): (usize, usize) = (100, 50); let v1 = sample_indices(&mut xor_rng(seed), length, amount, true); let v2 = sample_indices_inplace(&mut xor_rng(seed), length as u32, amount as u32); - assert!(v1.iter_usize().all(|e| e < length)); + assert!(v1.iter().all(|e| e < length)); assert_eq!(v1, v2); // Test Floyd's alg does produce different results @@ -1056,7 +1054,7 @@ mod test { let (length, amount): (usize, usize) = (1<<20, 50); let v1 = sample_indices(&mut xor_rng(seed), length, amount, true); let v2 = sample_indices_floyd(&mut xor_rng(seed), length as u32, amount as u32, true); - assert!(v1.iter_usize().all(|e| e < length)); + assert!(v1.iter().all(|e| e < length)); assert_eq!(v1, v2); // A large length and larger amount should use cache @@ -1064,7 +1062,7 @@ mod test { let (length, amount): (usize, usize) = (1<<20, 600); let v1 = sample_indices(&mut xor_rng(seed), length, amount, true); let v2 = sample_indices_cache(&mut xor_rng(seed), length, amount); - assert!(v1.iter_usize().all(|e| e < length)); + assert!(v1.iter().all(|e| e < length)); assert_eq!(v1, v2); } From fb64cf26f78f7f01beba3c242f3ecf21cffa4c46 Mon Sep 17 00:00:00 2001 From: Diggory Hardy <git@dhardy.name> Date: Wed, 4 Jul 2018 18:03:09 +0100 Subject: [PATCH 12/14] sample_indices: add new seq::index module for index sampling --- benches/seq.rs | 16 +- src/seq/index.rs | 386 ++++++++++++++++++++++++++++++++++++ src/{seq.rs => seq/mod.rs} | 392 +------------------------------------ 3 files changed, 404 insertions(+), 390 deletions(-) create mode 100644 src/seq/index.rs rename src/{seq.rs => seq/mod.rs} (64%) diff --git a/benches/seq.rs b/benches/seq.rs index a38ad1148f3..77de182bf0f 100644 --- a/benches/seq.rs +++ b/benches/seq.rs @@ -87,16 +87,16 @@ macro_rules! sample_indices { fn $name(b: &mut Bencher) { let mut rng = SmallRng::from_rng(thread_rng()).unwrap(); b.iter(|| { - $fn(&mut rng, $length, $amount, true) + index::$fn(&mut rng, $length, $amount, false) }) } } } -sample_indices!(misc_sample_indices_1_of_1k, sample_indices, 1, 1000); -sample_indices!(misc_sample_indices_10_of_1k, sample_indices, 10, 1000); -sample_indices!(misc_sample_indices_100_of_1k, sample_indices, 100, 1000); -sample_indices!(misc_sample_indices_100_of_1M, sample_indices, 100, 1000_000); -sample_indices!(misc_sample_indices_100_of_1G, sample_indices, 100, 1000_000_000); -sample_indices!(misc_sample_indices_400_of_1G, sample_indices, 400, 1000_000_000); -sample_indices!(misc_sample_indices_600_of_1G, sample_indices, 600, 1000_000_000); +sample_indices!(misc_sample_indices_1_of_1k, sample, 1, 1000); +sample_indices!(misc_sample_indices_10_of_1k, sample, 10, 1000); +sample_indices!(misc_sample_indices_100_of_1k, sample, 100, 1000); +sample_indices!(misc_sample_indices_100_of_1M, sample, 100, 1000_000); +sample_indices!(misc_sample_indices_100_of_1G, sample, 100, 1000_000_000); +sample_indices!(misc_sample_indices_400_of_1G, sample, 400, 1000_000_000); +sample_indices!(misc_sample_indices_600_of_1G, sample, 600, 1000_000_000); diff --git a/src/seq/index.rs b/src/seq/index.rs new file mode 100644 index 00000000000..aee53c498a2 --- /dev/null +++ b/src/seq/index.rs @@ -0,0 +1,386 @@ +// Copyright 2018 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// https://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Index sampling + +#[cfg(feature="alloc")] use core::slice; + +#[cfg(feature="std")] use std::vec; +#[cfg(all(feature="alloc", not(feature="std")))] use alloc::vec::{self, Vec}; +// BTreeMap is not as fast in tests, but better than nothing. +#[cfg(feature="std")] use std::collections::{HashSet}; +#[cfg(all(feature="alloc", not(feature="std")))] use alloc::collections::BTreeSet; + +#[cfg(feature="alloc")] use distributions::{Distribution, Uniform}; +use Rng; + +/// A vector of indices. +/// +/// Multiple internal representations are possible. +#[derive(Clone, Debug)] +pub enum IndexVec { + #[doc(hidden)] U32(Vec<u32>), + #[doc(hidden)] USize(Vec<usize>), +} + +impl IndexVec { + /// Returns the number of indices + pub fn len(&self) -> usize { + match self { + &IndexVec::U32(ref v) => v.len(), + &IndexVec::USize(ref v) => v.len(), + } + } + + /// Return the value at the given `index`. + /// + /// (Note: we cannot implement `std::ops::Index` because of lifetime + /// restrictions.) + pub fn index(&self, index: usize) -> usize { + match self { + &IndexVec::U32(ref v) => v[index] as usize, + &IndexVec::USize(ref v) => v[index], + } + } + + /// Return result as a `Vec<usize>`. Conversion may or may not be trivial. + pub fn into_vec(self) -> Vec<usize> { + match self { + IndexVec::U32(v) => v.into_iter().map(|i| i as usize).collect(), + IndexVec::USize(v) => v, + } + } + + /// Iterate over the indices as a sequence of `usize` values + pub fn iter<'a>(&'a self) -> IndexVecIter<'a> { + match self { + &IndexVec::U32(ref v) => IndexVecIter::U32(v.iter()), + &IndexVec::USize(ref v) => IndexVecIter::USize(v.iter()), + } + } + + /// Convert into an iterator over the indices as a sequence of `usize` values + pub fn into_iter(self) -> IndexVecIntoIter { + match self { + IndexVec::U32(v) => IndexVecIntoIter::U32(v.into_iter()), + IndexVec::USize(v) => IndexVecIntoIter::USize(v.into_iter()), + } + } +} + +impl PartialEq for IndexVec { + fn eq(&self, other: &IndexVec) -> bool { + use self::IndexVec::*; + match (self, other) { + (&U32(ref v1), &U32(ref v2)) => v1 == v2, + (&USize(ref v1), &USize(ref v2)) => v1 == v2, + (&U32(ref v1), &USize(ref v2)) => (v1.len() == v2.len()) + && (v1.iter().zip(v2.iter()).all(|(x, y)| *x as usize == *y)), + (&USize(ref v1), &U32(ref v2)) => (v1.len() == v2.len()) + && (v1.iter().zip(v2.iter()).all(|(x, y)| *x == *y as usize)), + } + } +} + +impl From<Vec<u32>> for IndexVec { + fn from(v: Vec<u32>) -> Self { + IndexVec::U32(v) + } +} + +impl From<Vec<usize>> for IndexVec { + fn from(v: Vec<usize>) -> Self { + IndexVec::USize(v) + } +} + +/// Return type of `IndexVec::iter`. +#[derive(Debug)] +pub enum IndexVecIter<'a> { + #[doc(hidden)] U32(slice::Iter<'a, u32>), + #[doc(hidden)] USize(slice::Iter<'a, usize>), +} + +impl<'a> Iterator for IndexVecIter<'a> { + type Item = usize; + fn next(&mut self) -> Option<usize> { + use self::IndexVecIter::*; + match self { + &mut U32(ref mut iter) => iter.next().map(|i| *i as usize), + &mut USize(ref mut iter) => iter.next().cloned(), + } + } + + fn size_hint(&self) -> (usize, Option<usize>) { + match self { + &IndexVecIter::U32(ref v) => v.size_hint(), + &IndexVecIter::USize(ref v) => v.size_hint(), + } + } +} + +impl<'a> ExactSizeIterator for IndexVecIter<'a> {} + +/// Return type of `IndexVec::into_iter`. +#[derive(Clone, Debug)] +pub enum IndexVecIntoIter { + #[doc(hidden)] U32(vec::IntoIter<u32>), + #[doc(hidden)] USize(vec::IntoIter<usize>), +} + +impl Iterator for IndexVecIntoIter { + type Item = usize; + + fn next(&mut self) -> Option<Self::Item> { + use self::IndexVecIntoIter::*; + match self { + &mut U32(ref mut v) => v.next().map(|i| i as usize), + &mut USize(ref mut v) => v.next(), + } + } + + fn size_hint(&self) -> (usize, Option<usize>) { + use self::IndexVecIntoIter::*; + match self { + &U32(ref v) => v.size_hint(), + &USize(ref v) => v.size_hint(), + } + } +} + +impl ExactSizeIterator for IndexVecIntoIter {} + + +/// Randomly sample exactly `amount` distinct indices from `0..length`. +/// +/// If `shuffled == true` then the sampled values will be fully shuffled; +/// otherwise the values may only partially shuffled, depending on the +/// algorithm used (i.e. biases may exist in the ordering of sampled elements). +/// Depending on the algorithm used internally, full shuffling may add +/// significant overhead for `amount` > 10 or so, but not more than double +/// the time and often much less. +/// +/// This method is used internally by the slice sampling methods, but it can +/// sometimes be useful to have the indices themselves so this is provided as +/// an alternative. +/// +/// The implementation used is not specified; we automatically select the +/// fastest available implementation for the `length` and `amount` parameters +/// (based on detailed profiling on an Intel Haswell CPU). Roughly speaking, +/// complexity is `O(amount)`, except that when `amount` is small, performance +/// is closer to `O(amount^2)`, and when `length` is close to `amount` then +/// `O(length)`. +/// +/// Note that performance is significantly better over `u32` indices than over +/// `u64` indices. Because of this we hide the underlying type behind an +/// abstraction, `IndexVec`. +/// +/// If an allocation-free `no_std` function is required, it is suggested +/// to adapt the internal `sample_floyd` implementation. +/// +/// Panics if `amount > length`. +pub fn sample<R>(rng: &mut R, length: usize, amount: usize, + shuffled: bool) -> IndexVec + where R: Rng + ?Sized, +{ + if amount > length { + panic!("`amount` of samples must be less than or equal to `length`"); + } + if length > (::core::u32::MAX as usize) { + // We never want to use inplace here, but could use floyd's alg + // Lazy version: always use the cache alg. + return sample_rejection(rng, length, amount); + } + let amount = amount as u32; + let length = length as u32; + + // Choice of algorithm here depends on both length and amount. See: + // https://github.com/rust-lang-nursery/rand/pull/479 + // We do some calculations with f32. Accuracy is not very important. + + if amount < 442 { + const C: [[f32; 2]; 2] = [[1.2, 6.0/45.0], [10.0, 70.0/9.0]]; + let j = if length < 500_000 { 0 } else { 1 }; + let amount_fp = amount as f32; + let m4 = C[0][j] * amount_fp; + // Short-cut: when amount < 12, floyd's is always faster + if amount > 11 && (length as f32) < (C[1][j] + m4) * amount_fp { + sample_inplace(rng, length, amount) + } else { + sample_floyd(rng, length, amount, shuffled) + } + } else { + const C: [f32; 2] = [590.0, 600.0/9.0]; + let j = if length < 500_000 { 0 } else { 1 }; + if (length as f32) < C[j] * (amount as f32) { + sample_inplace(rng, length, amount) + } else { + // note: could have a specific u32 impl, but I'm lazy and + // generics don't have usable conversions + sample_rejection(rng, length as usize, amount as usize) + } + } +} + +/// Randomly sample exactly `amount` indices from `0..length`, using Floyd's +/// combination algorithm. +/// +/// If `shuffled == false`, the values are only partially shuffled (i.e. biases +/// exist in the ordering of sampled elements). If `shuffled == true`, the +/// values are fully shuffled. +/// +/// This implementation uses `O(amount)` memory and `O(amount^2)` time. +fn sample_floyd<R>(rng: &mut R, length: u32, amount: u32, shuffled: bool) -> IndexVec + where R: Rng + ?Sized, +{ + debug_assert!(amount <= length); + let mut indices = Vec::with_capacity(amount as usize); + for j in length - amount .. length { + let t = rng.gen_range(0, j + 1); + if indices.contains(&t) { + indices.push(j) + } else { + indices.push(t) + }; + } + if shuffled { + // Note that there is a variant of Floyd's algorithm with native full + // shuffling, but it is slow because it requires arbitrary insertions. + use super::SliceRandom; + indices.shuffle(rng); + } + IndexVec::from(indices) +} + +/// Randomly sample exactly `amount` indices from `0..length`, using an inplace +/// partial Fisher-Yates method. +/// Sample an amount of indices using an inplace partial fisher yates method. +/// +/// This allocates the entire `length` of indices and randomizes only the first `amount`. +/// It then truncates to `amount` and returns. +/// +/// This method is not appropriate for large `length` and potentially uses a lot +/// of memory; because of this we only implement for `u32` index (which improves +/// performance in all cases). +/// +/// This is likely the fastest for small lengths since it avoids the need for +/// allocations. Set-up is `O(length)` time and memory and shuffling is +/// `O(amount)` time. +fn sample_inplace<R>(rng: &mut R, length: u32, amount: u32) -> IndexVec + where R: Rng + ?Sized, +{ + debug_assert!(amount <= length); + let mut indices: Vec<u32> = Vec::with_capacity(length as usize); + indices.extend(0..length); + for i in 0..amount { + let j: u32 = rng.gen_range(i, length); + indices.swap(i as usize, j as usize); + } + indices.truncate(amount as usize); + debug_assert_eq!(indices.len(), amount as usize); + IndexVec::from(indices) +} + +/// Randomly sample exactly `amount` indices from `0..length`, using rejection +/// sampling. +/// +/// Since `amount <<< length` there is a low chance of a random sample in +/// `0..length` being a duplicate. We test for duplicates and resample where +/// necessary. The algorithm is `O(amount)` time and memory. +fn sample_rejection<R>(rng: &mut R, length: usize, amount: usize) -> IndexVec + where R: Rng + ?Sized, +{ + debug_assert!(amount < length); + #[cfg(feature="std")] let mut cache = HashSet::with_capacity(amount); + #[cfg(not(feature="std"))] let mut cache = BTreeSet::new(); + let distr = Uniform::new(0, length); + let mut indices = Vec::with_capacity(amount); + for _ in 0..amount { + let mut pos = distr.sample(rng); + while !cache.insert(pos) { + pos = distr.sample(rng); + } + indices.push(pos); + } + + debug_assert_eq!(indices.len(), amount); + IndexVec::from(indices) +} + +#[cfg(test)] +mod test { + use super::*; + use {Rng, SeedableRng}; + use prng::XorShiftRng; + + #[test] + fn test_sample_boundaries() { + let mut r = ::test::rng(404); + + assert_eq!(sample_inplace(&mut r, 0, 0).len(), 0); + assert_eq!(sample_inplace(&mut r, 1, 0).len(), 0); + assert_eq!(sample_inplace(&mut r, 1, 1).into_vec(), vec![0]); + + assert_eq!(sample_rejection(&mut r, 1, 0).len(), 0); + + assert_eq!(sample_floyd(&mut r, 0, 0, false).len(), 0); + assert_eq!(sample_floyd(&mut r, 1, 0, false).len(), 0); + assert_eq!(sample_floyd(&mut r, 1, 1, false).into_vec(), vec![0]); + + // These algorithms should be fast with big numbers. Test average. + let sum: usize = sample_rejection(&mut r, 1 << 25, 10) + .into_iter().sum(); + assert!(1 << 25 < sum && sum < (1 << 25) * 25); + + let sum: usize = sample_floyd(&mut r, 1 << 25, 10, false) + .into_iter().sum(); + assert!(1 << 25 < sum && sum < (1 << 25) * 25); + } + + #[test] + fn test_sample_alg() { + let xor_rng = XorShiftRng::from_seed; + + let mut r = ::test::rng(403); + let mut seed = [0u8; 16]; + + // We can't test which algorithm is used directly, but Floyd's alg + // should produce different results from the others. (Also, `inplace` + // and `cached` currently use different sizes thus produce different results.) + + // A small length and relatively large amount should use inplace + r.fill(&mut seed); + let (length, amount): (usize, usize) = (100, 50); + let v1 = sample(&mut xor_rng(seed), length, amount, true); + let v2 = sample_inplace(&mut xor_rng(seed), length as u32, amount as u32); + assert!(v1.iter().all(|e| e < length)); + assert_eq!(v1, v2); + + // Test Floyd's alg does produce different results + let v3 = sample_floyd(&mut xor_rng(seed), length as u32, amount as u32, true); + assert!(v1 != v3); + + // A large length and small amount should use Floyd + r.fill(&mut seed); + let (length, amount): (usize, usize) = (1<<20, 50); + let v1 = sample(&mut xor_rng(seed), length, amount, true); + let v2 = sample_floyd(&mut xor_rng(seed), length as u32, amount as u32, true); + assert!(v1.iter().all(|e| e < length)); + assert_eq!(v1, v2); + + // A large length and larger amount should use cache + r.fill(&mut seed); + let (length, amount): (usize, usize) = (1<<20, 600); + let v1 = sample(&mut xor_rng(seed), length, amount, true); + let v2 = sample_rejection(&mut xor_rng(seed), length, amount); + assert!(v1.iter().all(|e| e < length)); + assert_eq!(v1, v2); + } +} diff --git a/src/seq.rs b/src/seq/mod.rs similarity index 64% rename from src/seq.rs rename to src/seq/mod.rs index 3c48a2135d9..830ddcaf6ea 100644 --- a/src/seq.rs +++ b/src/seq/mod.rs @@ -12,18 +12,14 @@ //! //! TODO: module doc + +#[cfg(feature="alloc")] pub mod index; + #[cfg(feature="alloc")] use core::ops::Index; -#[cfg(feature="alloc")] use core::slice; -#[cfg(feature="std")] use std::vec; -#[cfg(all(feature="alloc", not(feature="std")))] use alloc::vec; #[cfg(all(feature="alloc", not(feature="std")))] use alloc::vec::Vec; -// BTreeMap is not as fast in tests, but better than nothing. -#[cfg(feature="std")] use std::collections::{HashSet}; -#[cfg(all(feature="alloc", not(feature="std")))] use alloc::collections::BTreeSet; use Rng; -#[cfg(feature="alloc")] use distributions::{Distribution, Uniform}; #[cfg(feature="alloc")] use distributions::WeightedError; #[cfg(feature="alloc")] use distributions::uniform::{SampleUniform, SampleBorrow}; @@ -64,7 +60,7 @@ pub trait SliceRandom { /// Produces an iterator that chooses `amount` elements from the slice at /// random without repeating any. /// - /// In case this API is not sufficiently flexible, use `sample_indices` then + /// In case this API is not sufficiently flexible, use `index::sample` then /// apply the indices to the slice. /// /// If `shuffled == true` then the sampled values will be fully shuffled; @@ -74,7 +70,7 @@ pub trait SliceRandom { /// may add significant overhead for `amount` > 10 or so, but not more /// than double the time and often much less. /// - /// Complexity is expected to be the same as `sample_indices`. + /// Complexity is expected to be the same as `index::sample`. /// /// # Example /// ``` @@ -327,7 +323,7 @@ impl<T> SliceRandom for [T] { SliceChooseIter { slice: self, _phantom: Default::default(), - indices: sample_indices(rng, self.len(), amount, shuffled).into_iter(), + indices: index::sample(rng, self.len(), amount, shuffled).into_iter(), } } @@ -399,7 +395,7 @@ impl<I> IteratorRandom for I where I: Iterator + Sized {} pub struct SliceChooseIter<'a, S: ?Sized + 'a, T: 'a> { slice: &'a S, _phantom: ::core::marker::PhantomData<T>, - indices: IndexVecIntoIter, + indices: index::IndexVecIntoIter, } #[cfg(feature = "alloc")] @@ -464,7 +460,7 @@ pub fn sample_slice<R, T>(rng: &mut R, slice: &[T], amount: usize) -> Vec<T> where R: Rng + ?Sized, T: Clone { - let indices = sample_indices(rng, slice.len(), amount, true).into_iter(); + let indices = index::sample(rng, slice.len(), amount, true).into_iter(); let mut out = Vec::with_capacity(amount); out.extend(indices.map(|i| slice[i].clone())); @@ -487,320 +483,13 @@ pub fn sample_slice<R, T>(rng: &mut R, slice: &[T], amount: usize) -> Vec<T> pub fn sample_slice_ref<'a, R, T>(rng: &mut R, slice: &'a [T], amount: usize) -> Vec<&'a T> where R: Rng + ?Sized { - let indices = sample_indices(rng, slice.len(), amount, true).into_iter(); + let indices = index::sample(rng, slice.len(), amount, true).into_iter(); let mut out = Vec::with_capacity(amount); out.extend(indices.map(|i| &slice[i])); out } -/// A vector of indices. -/// -/// Multiple internal representations are possible. -#[cfg(feature = "alloc")] -#[derive(Clone, Debug)] -pub enum IndexVec { - #[doc(hidden)] U32(Vec<u32>), - #[doc(hidden)] USize(Vec<usize>), -} - -#[cfg(feature = "alloc")] -impl IndexVec { - /// Returns the number of indices - pub fn len(&self) -> usize { - match self { - &IndexVec::U32(ref v) => v.len(), - &IndexVec::USize(ref v) => v.len(), - } - } - - /// Return the value at the given `index`. - /// - /// (Note: we cannot implement `std::ops::Index` because of lifetime - /// restrictions.) - pub fn index(&self, index: usize) -> usize { - match self { - &IndexVec::U32(ref v) => v[index] as usize, - &IndexVec::USize(ref v) => v[index], - } - } - - /// Return result as a `Vec<usize>`. Conversion may or may not be trivial. - pub fn into_vec(self) -> Vec<usize> { - match self { - IndexVec::U32(v) => v.into_iter().map(|i| i as usize).collect(), - IndexVec::USize(v) => v, - } - } - - /// Iterate over the indices as a sequence of `usize` values - pub fn iter<'a>(&'a self) -> IndexVecIter<'a> { - match self { - &IndexVec::U32(ref v) => IndexVecIter::U32(v.iter()), - &IndexVec::USize(ref v) => IndexVecIter::USize(v.iter()), - } - } - - /// Convert into an iterator over the indices as a sequence of `usize` values - pub fn into_iter(self) -> IndexVecIntoIter { - match self { - IndexVec::U32(v) => IndexVecIntoIter::U32(v.into_iter()), - IndexVec::USize(v) => IndexVecIntoIter::USize(v.into_iter()), - } - } -} - -#[cfg(feature = "alloc")] -impl PartialEq for IndexVec { - fn eq(&self, other: &IndexVec) -> bool { - use self::IndexVec::*; - match (self, other) { - (&U32(ref v1), &U32(ref v2)) => v1 == v2, - (&USize(ref v1), &USize(ref v2)) => v1 == v2, - (&U32(ref v1), &USize(ref v2)) => (v1.len() == v2.len()) - && (v1.iter().zip(v2.iter()).all(|(x, y)| *x as usize == *y)), - (&USize(ref v1), &U32(ref v2)) => (v1.len() == v2.len()) - && (v1.iter().zip(v2.iter()).all(|(x, y)| *x == *y as usize)), - } - } -} - -#[cfg(feature = "alloc")] -impl From<Vec<u32>> for IndexVec { - fn from(v: Vec<u32>) -> Self { - IndexVec::U32(v) - } -} - -#[cfg(feature = "alloc")] -impl From<Vec<usize>> for IndexVec { - fn from(v: Vec<usize>) -> Self { - IndexVec::USize(v) - } -} - -/// Return type of `IndexVec::iter`. -#[cfg(feature = "alloc")] -#[derive(Debug)] -pub enum IndexVecIter<'a> { - #[doc(hidden)] U32(slice::Iter<'a, u32>), - #[doc(hidden)] USize(slice::Iter<'a, usize>), -} - -#[cfg(feature = "alloc")] -impl<'a> Iterator for IndexVecIter<'a> { - type Item = usize; - fn next(&mut self) -> Option<usize> { - use self::IndexVecIter::*; - match self { - &mut U32(ref mut iter) => iter.next().map(|i| *i as usize), - &mut USize(ref mut iter) => iter.next().cloned(), - } - } - - fn size_hint(&self) -> (usize, Option<usize>) { - match self { - &IndexVecIter::U32(ref v) => v.size_hint(), - &IndexVecIter::USize(ref v) => v.size_hint(), - } - } -} - -#[cfg(feature = "alloc")] -impl<'a> ExactSizeIterator for IndexVecIter<'a> {} - -/// Return type of `IndexVec::into_iter`. -#[cfg(feature = "alloc")] -#[derive(Clone, Debug)] -pub enum IndexVecIntoIter { - #[doc(hidden)] U32(vec::IntoIter<u32>), - #[doc(hidden)] USize(vec::IntoIter<usize>), -} - -#[cfg(feature = "alloc")] -impl Iterator for IndexVecIntoIter { - type Item = usize; - - fn next(&mut self) -> Option<Self::Item> { - use self::IndexVecIntoIter::*; - match self { - &mut U32(ref mut v) => v.next().map(|i| i as usize), - &mut USize(ref mut v) => v.next(), - } - } - - fn size_hint(&self) -> (usize, Option<usize>) { - use self::IndexVecIntoIter::*; - match self { - &U32(ref v) => v.size_hint(), - &USize(ref v) => v.size_hint(), - } - } -} - -#[cfg(feature = "alloc")] -impl ExactSizeIterator for IndexVecIntoIter {} - - -/// Randomly sample exactly `amount` distinct indices from `0..length`. -/// -/// If `shuffled == true` then the sampled values will be fully shuffled; -/// otherwise the values may only partially shuffled, depending on the -/// algorithm used (i.e. biases may exist in the ordering of sampled elements). -/// Depending on the algorithm used internally, full shuffling may add -/// significant overhead for `amount` > 10 or so, but not more than double -/// the time and often much less. -/// -/// This method is used internally by the slice sampling methods, but it can -/// sometimes be useful to have the indices themselves so this is provided as -/// an alternative. -/// -/// The implementation used is not specified; we automatically select the -/// fastest available implementation for the `length` and `amount` parameters -/// (based on detailed profiling on an Intel Haswell CPU). Roughly speaking, -/// complexity is `O(amount)`, except that when `amount` is small, performance -/// is closer to `O(amount^2)`, and when `length` is close to `amount` then -/// `O(length)`. -/// -/// Note that performance is significantly better over `u32` indices than over -/// `u64` indices. Because of this we hide the underlying type behind an -/// abstraction, `IndexVec`. -/// -/// If an allocation-free `no_std` function is required, it is suggested -/// to adapt the internal `sample_indices_floyd` implementation. -/// -/// Panics if `amount > length`. -#[cfg(feature = "alloc")] -pub fn sample_indices<R>(rng: &mut R, length: usize, amount: usize, - shuffled: bool) -> IndexVec - where R: Rng + ?Sized, -{ - if amount > length { - panic!("`amount` of samples must be less than or equal to `length`"); - } - if length > (::core::u32::MAX as usize) { - // We never want to use inplace here, but could use floyd's alg - // Lazy version: always use the cache alg. - return sample_indices_cache(rng, length, amount); - } - let amount = amount as u32; - let length = length as u32; - - // Choice of algorithm here depends on both length and amount. See: - // https://github.com/rust-lang-nursery/rand/pull/479 - // We do some calculations with f32. Accuracy is not very important. - - if amount < 442 { - const C: [[f32; 2]; 2] = [[1.2, 6.0/45.0], [10.0, 70.0/9.0]]; - let j = if length < 500_000 { 0 } else { 1 }; - let amount_fp = amount as f32; - let m4 = C[0][j] * amount_fp; - // Short-cut: when amount < 12, floyd's is always faster - if amount > 11 && (length as f32) < (C[1][j] + m4) * amount_fp { - sample_indices_inplace(rng, length, amount) - } else { - sample_indices_floyd(rng, length, amount, shuffled) - } - } else { - const C: [f32; 2] = [590.0, 600.0/9.0]; - let j = if length < 500_000 { 0 } else { 1 }; - if (length as f32) < C[j] * (amount as f32) { - sample_indices_inplace(rng, length, amount) - } else { - // note: could have a specific u32 impl, but I'm lazy and - // generics don't have usable conversions - sample_indices_cache(rng, length as usize, amount as usize) - } - } -} - -/// Randomly sample exactly `amount` indices from `0..length`, using Floyd's -/// combination algorithm. -/// -/// If `shuffled == false`, the values are only partially shuffled (i.e. biases -/// exist in the ordering of sampled elements). If `shuffled == true`, the -/// values are fully shuffled. -/// -/// This implementation uses `O(amount)` memory and `O(amount^2)` time. -#[cfg(feature = "alloc")] -fn sample_indices_floyd<R>(rng: &mut R, length: u32, amount: u32, shuffled: bool) -> IndexVec - where R: Rng + ?Sized, -{ - debug_assert!(amount <= length); - let mut indices = Vec::with_capacity(amount as usize); - for j in length - amount .. length { - let t = rng.gen_range(0, j + 1); - if indices.contains(&t) { - indices.push(j) - } else { - indices.push(t) - }; - } - if shuffled { - // Note that there is a variant of Floyd's algorithm with native full - // shuffling, but it is slow because it requires arbitrary insertions. - indices.shuffle(rng); - } - IndexVec::from(indices) -} - -/// Randomly sample exactly `amount` indices from `0..length`, using an inplace -/// partial Fisher-Yates method. -/// Sample an amount of indices using an inplace partial fisher yates method. -/// -/// This allocates the entire `length` of indices and randomizes only the first `amount`. -/// It then truncates to `amount` and returns. -/// -/// This method is not appropriate for large `length` and potentially uses a lot -/// of memory; because of this we only implement for `u32` index (which improves -/// performance in all cases). -/// -/// This is likely the fastest for small lengths since it avoids the need for -/// allocations. Set-up is `O(length)` time and memory and shuffling is -/// `O(amount)` time. -#[cfg(feature = "alloc")] -fn sample_indices_inplace<R>(rng: &mut R, length: u32, amount: u32) -> IndexVec - where R: Rng + ?Sized, -{ - debug_assert!(amount <= length); - let mut indices: Vec<u32> = Vec::with_capacity(length as usize); - indices.extend(0..length); - for i in 0..amount { - let j: u32 = rng.gen_range(i, length); - indices.swap(i as usize, j as usize); - } - indices.truncate(amount as usize); - debug_assert_eq!(indices.len(), amount as usize); - IndexVec::from(indices) -} - -/// Randomly sample exactly `amount` indices from `0..length`, using rejection -/// sampling. -/// -/// Since `amount <<< length` there is a low chance of a random sample in -/// `0..length` being a duplicate. We test for duplicates and resample where -/// necessary. The algorithm is `O(amount)` time and memory. -#[cfg(feature = "alloc")] -fn sample_indices_cache<R>(rng: &mut R, length: usize, amount: usize) -> IndexVec - where R: Rng + ?Sized, -{ - debug_assert!(amount < length); - #[cfg(feature="std")] let mut cache = HashSet::with_capacity(amount); - #[cfg(not(feature="std"))] let mut cache = BTreeSet::new(); - let distr = Uniform::new(0, length); - let mut indices = Vec::with_capacity(amount); - for _ in 0..amount { - let mut pos = distr.sample(rng); - while !cache.insert(pos) { - pos = distr.sample(rng); - } - indices.push(pos); - } - - debug_assert_eq!(indices.len(), amount); - IndexVec::from(indices) -} - #[cfg(test)] mod test { use super::*; @@ -855,7 +544,6 @@ mod test { #[test] fn test_shuffle() { - let mut r = ::test::rng(108); let empty: &mut [isize] = &mut []; empty.shuffle(&mut r); @@ -959,25 +647,6 @@ mod test { let v = sample_slice(&mut r, &[42, 133], 2); assert!(&v[..] == [42, 133] || v[..] == [133, 42]); - assert_eq!(sample_indices_inplace(&mut r, 0, 0).len(), 0); - assert_eq!(sample_indices_inplace(&mut r, 1, 0).len(), 0); - assert_eq!(sample_indices_inplace(&mut r, 1, 1).into_vec(), vec![0]); - - assert_eq!(sample_indices_cache(&mut r, 1, 0).len(), 0); - - assert_eq!(sample_indices_floyd(&mut r, 0, 0, false).len(), 0); - assert_eq!(sample_indices_floyd(&mut r, 1, 0, false).len(), 0); - assert_eq!(sample_indices_floyd(&mut r, 1, 1, false).into_vec(), vec![0]); - - // These algorithms should be fast with big numbers. Test average. - let sum: usize = sample_indices_cache(&mut r, 1 << 25, 10) - .into_iter().sum(); - assert!(1 << 25 < sum && sum < (1 << 25) * 25); - - let sum: usize = sample_indices_floyd(&mut r, 1 << 25, 10, false) - .into_iter().sum(); - assert!(1 << 25 < sum && sum < (1 << 25) * 25); - // Make sure lucky 777's aren't lucky let slice = &[42, 777]; let mut num_42 = 0; @@ -1010,7 +679,7 @@ mod test { r.fill(&mut seed); // assert the basics work - let regular = sample_indices( + let regular = index::sample( &mut xor_rng(seed), length, amount, true); assert_eq!(regular.len(), amount); assert!(regular.iter().all(|e| e < length)); @@ -1025,47 +694,6 @@ mod test { } } - #[test] - #[cfg(feature = "alloc")] - fn test_sample_alg() { - let xor_rng = XorShiftRng::from_seed; - - let mut r = ::test::rng(403); - let mut seed = [0u8; 16]; - - // We can't test which algorithm is used directly, but Floyd's alg - // should produce different results from the others. (Also, `inplace` - // and `cached` currently use different sizes thus produce different results.) - - // A small length and relatively large amount should use inplace - r.fill(&mut seed); - let (length, amount): (usize, usize) = (100, 50); - let v1 = sample_indices(&mut xor_rng(seed), length, amount, true); - let v2 = sample_indices_inplace(&mut xor_rng(seed), length as u32, amount as u32); - assert!(v1.iter().all(|e| e < length)); - assert_eq!(v1, v2); - - // Test Floyd's alg does produce different results - let v3 = sample_indices_floyd(&mut xor_rng(seed), length as u32, amount as u32, true); - assert!(v1 != v3); - - // A large length and small amount should use Floyd - r.fill(&mut seed); - let (length, amount): (usize, usize) = (1<<20, 50); - let v1 = sample_indices(&mut xor_rng(seed), length, amount, true); - let v2 = sample_indices_floyd(&mut xor_rng(seed), length as u32, amount as u32, true); - assert!(v1.iter().all(|e| e < length)); - assert_eq!(v1, v2); - - // A large length and larger amount should use cache - r.fill(&mut seed); - let (length, amount): (usize, usize) = (1<<20, 600); - let v1 = sample_indices(&mut xor_rng(seed), length, amount, true); - let v2 = sample_indices_cache(&mut xor_rng(seed), length, amount); - assert!(v1.iter().all(|e| e < length)); - assert_eq!(v1, v2); - } - #[test] #[cfg(feature = "alloc")] fn test_weighted() { From 805022c0964c0e740f112578e69b68bb970cc03c Mon Sep 17 00:00:00 2001 From: Diggory Hardy <git@dhardy.name> Date: Thu, 5 Jul 2018 09:10:12 +0100 Subject: [PATCH 13/14] sample_indices: update model parameters This accounts for the "cache" method being replaced by rejection sampling and now using usize again. --- src/seq/index.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/seq/index.rs b/src/seq/index.rs index aee53c498a2..29953704337 100644 --- a/src/seq/index.rs +++ b/src/seq/index.rs @@ -205,7 +205,7 @@ pub fn sample<R>(rng: &mut R, length: usize, amount: usize, // https://github.com/rust-lang-nursery/rand/pull/479 // We do some calculations with f32. Accuracy is not very important. - if amount < 442 { + if amount < 217 { const C: [[f32; 2]; 2] = [[1.2, 6.0/45.0], [10.0, 70.0/9.0]]; let j = if length < 500_000 { 0 } else { 1 }; let amount_fp = amount as f32; @@ -217,7 +217,7 @@ pub fn sample<R>(rng: &mut R, length: usize, amount: usize, sample_floyd(rng, length, amount, shuffled) } } else { - const C: [f32; 2] = [590.0, 600.0/9.0]; + const C: [f32; 2] = [270.0, 330.0/9.0]; let j = if length < 500_000 { 0 } else { 1 }; if (length as f32) < C[j] * (amount as f32) { sample_inplace(rng, length, amount) From 19897e53c1908c5a193e2aacff59170e3e72b8de Mon Sep 17 00:00:00 2001 From: Diggory Hardy <git@dhardy.name> Date: Thu, 5 Jul 2018 09:31:00 +0100 Subject: [PATCH 14/14] sample_indices: always shuffle. Floyd's alg: optimise. --- benches/seq.rs | 5 +-- src/seq/index.rs | 90 +++++++++++++++++++++++++++--------------------- src/seq/mod.rs | 28 ++++++--------- 3 files changed, 64 insertions(+), 59 deletions(-) diff --git a/benches/seq.rs b/benches/seq.rs index 77de182bf0f..f143131763b 100644 --- a/benches/seq.rs +++ b/benches/seq.rs @@ -39,7 +39,7 @@ macro_rules! seq_slice_choose_multiple { // Collect full result to prevent unwanted shortcuts getting // first element (in case sample_indices returns an iterator). for (slot, sample) in result.iter_mut().zip( - x.choose_multiple(&mut rng, $amount, false)) { + x.choose_multiple(&mut rng, $amount)) { *slot = *sample; } result[$amount-1] @@ -87,7 +87,7 @@ macro_rules! sample_indices { fn $name(b: &mut Bencher) { let mut rng = SmallRng::from_rng(thread_rng()).unwrap(); b.iter(|| { - index::$fn(&mut rng, $length, $amount, false) + index::$fn(&mut rng, $length, $amount) }) } } @@ -98,5 +98,6 @@ sample_indices!(misc_sample_indices_10_of_1k, sample, 10, 1000); sample_indices!(misc_sample_indices_100_of_1k, sample, 100, 1000); sample_indices!(misc_sample_indices_100_of_1M, sample, 100, 1000_000); sample_indices!(misc_sample_indices_100_of_1G, sample, 100, 1000_000_000); +sample_indices!(misc_sample_indices_200_of_1G, sample, 200, 1000_000_000); sample_indices!(misc_sample_indices_400_of_1G, sample, 400, 1000_000_000); sample_indices!(misc_sample_indices_600_of_1G, sample, 600, 1000_000_000); diff --git a/src/seq/index.rs b/src/seq/index.rs index 29953704337..805b7f3c2f3 100644 --- a/src/seq/index.rs +++ b/src/seq/index.rs @@ -158,21 +158,15 @@ impl Iterator for IndexVecIntoIter { impl ExactSizeIterator for IndexVecIntoIter {} -/// Randomly sample exactly `amount` distinct indices from `0..length`. -/// -/// If `shuffled == true` then the sampled values will be fully shuffled; -/// otherwise the values may only partially shuffled, depending on the -/// algorithm used (i.e. biases may exist in the ordering of sampled elements). -/// Depending on the algorithm used internally, full shuffling may add -/// significant overhead for `amount` > 10 or so, but not more than double -/// the time and often much less. +/// Randomly sample exactly `amount` distinct indices from `0..length`, and +/// return them in random order (fully shuffled). /// /// This method is used internally by the slice sampling methods, but it can /// sometimes be useful to have the indices themselves so this is provided as /// an alternative. /// /// The implementation used is not specified; we automatically select the -/// fastest available implementation for the `length` and `amount` parameters +/// fastest available algorithm for the `length` and `amount` parameters /// (based on detailed profiling on an Intel Haswell CPU). Roughly speaking, /// complexity is `O(amount)`, except that when `amount` is small, performance /// is closer to `O(amount^2)`, and when `length` is close to `amount` then @@ -186,8 +180,7 @@ impl ExactSizeIterator for IndexVecIntoIter {} /// to adapt the internal `sample_floyd` implementation. /// /// Panics if `amount > length`. -pub fn sample<R>(rng: &mut R, length: usize, amount: usize, - shuffled: bool) -> IndexVec +pub fn sample<R>(rng: &mut R, length: usize, amount: usize) -> IndexVec where R: Rng + ?Sized, { if amount > length { @@ -205,8 +198,8 @@ pub fn sample<R>(rng: &mut R, length: usize, amount: usize, // https://github.com/rust-lang-nursery/rand/pull/479 // We do some calculations with f32. Accuracy is not very important. - if amount < 217 { - const C: [[f32; 2]; 2] = [[1.2, 6.0/45.0], [10.0, 70.0/9.0]]; + if amount < 163 { + const C: [[f32; 2]; 2] = [[1.6, 8.0/45.0], [10.0, 70.0/9.0]]; let j = if length < 500_000 { 0 } else { 1 }; let amount_fp = amount as f32; let m4 = C[0][j] * amount_fp; @@ -214,7 +207,7 @@ pub fn sample<R>(rng: &mut R, length: usize, amount: usize, if amount > 11 && (length as f32) < (C[1][j] + m4) * amount_fp { sample_inplace(rng, length, amount) } else { - sample_floyd(rng, length, amount, shuffled) + sample_floyd(rng, length, amount) } } else { const C: [f32; 2] = [270.0, 330.0/9.0]; @@ -232,29 +225,50 @@ pub fn sample<R>(rng: &mut R, length: usize, amount: usize, /// Randomly sample exactly `amount` indices from `0..length`, using Floyd's /// combination algorithm. /// -/// If `shuffled == false`, the values are only partially shuffled (i.e. biases -/// exist in the ordering of sampled elements). If `shuffled == true`, the -/// values are fully shuffled. +/// The output values are fully shuffled. (Overhead is under 50%.) /// /// This implementation uses `O(amount)` memory and `O(amount^2)` time. -fn sample_floyd<R>(rng: &mut R, length: u32, amount: u32, shuffled: bool) -> IndexVec +fn sample_floyd<R>(rng: &mut R, length: u32, amount: u32) -> IndexVec where R: Rng + ?Sized, { + // Shouldn't this be on std::slice? + fn find_pos<T: Copy + PartialEq<T>>(slice: &[T], elt: T) -> Option<usize> { + for i in 0..slice.len() { + if slice[i] == elt { + return Some(i); + } + } + None + } + + // For small amount we use Floyd's fully-shuffled variant. For larger + // amounts this is slow due to Vec::insert performance, so we shuffle + // afterwards. Benchmarks show little overhead from extra logic. + let floyd_shuffle = amount < 50; + debug_assert!(amount <= length); let mut indices = Vec::with_capacity(amount as usize); for j in length - amount .. length { let t = rng.gen_range(0, j + 1); - if indices.contains(&t) { - indices.push(j) + if floyd_shuffle { + if let Some(pos) = find_pos(&indices, t) { + indices.insert(pos, j); + continue; + } } else { - indices.push(t) - }; + if indices.contains(&t) { + indices.push(j); + continue; + } + } + indices.push(t); } - if shuffled { - // Note that there is a variant of Floyd's algorithm with native full - // shuffling, but it is slow because it requires arbitrary insertions. - use super::SliceRandom; - indices.shuffle(rng); + if !floyd_shuffle { + // Reimplement SliceRandom::shuffle with smaller indices + for i in (1..amount).rev() { + // invariant: elements with index > i have been locked in place. + indices.swap(i as usize, rng.gen_range(0, i + 1) as usize); + } } IndexVec::from(indices) } @@ -270,9 +284,7 @@ fn sample_floyd<R>(rng: &mut R, length: u32, amount: u32, shuffled: bool) -> Ind /// of memory; because of this we only implement for `u32` index (which improves /// performance in all cases). /// -/// This is likely the fastest for small lengths since it avoids the need for -/// allocations. Set-up is `O(length)` time and memory and shuffling is -/// `O(amount)` time. +/// Set-up is `O(length)` time and memory and shuffling is `O(amount)` time. fn sample_inplace<R>(rng: &mut R, length: u32, amount: u32) -> IndexVec where R: Rng + ?Sized, { @@ -330,16 +342,16 @@ mod test { assert_eq!(sample_rejection(&mut r, 1, 0).len(), 0); - assert_eq!(sample_floyd(&mut r, 0, 0, false).len(), 0); - assert_eq!(sample_floyd(&mut r, 1, 0, false).len(), 0); - assert_eq!(sample_floyd(&mut r, 1, 1, false).into_vec(), vec![0]); + assert_eq!(sample_floyd(&mut r, 0, 0).len(), 0); + assert_eq!(sample_floyd(&mut r, 1, 0).len(), 0); + assert_eq!(sample_floyd(&mut r, 1, 1).into_vec(), vec![0]); // These algorithms should be fast with big numbers. Test average. let sum: usize = sample_rejection(&mut r, 1 << 25, 10) .into_iter().sum(); assert!(1 << 25 < sum && sum < (1 << 25) * 25); - let sum: usize = sample_floyd(&mut r, 1 << 25, 10, false) + let sum: usize = sample_floyd(&mut r, 1 << 25, 10) .into_iter().sum(); assert!(1 << 25 < sum && sum < (1 << 25) * 25); } @@ -358,27 +370,27 @@ mod test { // A small length and relatively large amount should use inplace r.fill(&mut seed); let (length, amount): (usize, usize) = (100, 50); - let v1 = sample(&mut xor_rng(seed), length, amount, true); + let v1 = sample(&mut xor_rng(seed), length, amount); let v2 = sample_inplace(&mut xor_rng(seed), length as u32, amount as u32); assert!(v1.iter().all(|e| e < length)); assert_eq!(v1, v2); // Test Floyd's alg does produce different results - let v3 = sample_floyd(&mut xor_rng(seed), length as u32, amount as u32, true); + let v3 = sample_floyd(&mut xor_rng(seed), length as u32, amount as u32); assert!(v1 != v3); // A large length and small amount should use Floyd r.fill(&mut seed); let (length, amount): (usize, usize) = (1<<20, 50); - let v1 = sample(&mut xor_rng(seed), length, amount, true); - let v2 = sample_floyd(&mut xor_rng(seed), length as u32, amount as u32, true); + let v1 = sample(&mut xor_rng(seed), length, amount); + let v2 = sample_floyd(&mut xor_rng(seed), length as u32, amount as u32); assert!(v1.iter().all(|e| e < length)); assert_eq!(v1, v2); // A large length and larger amount should use cache r.fill(&mut seed); let (length, amount): (usize, usize) = (1<<20, 600); - let v1 = sample(&mut xor_rng(seed), length, amount, true); + let v1 = sample(&mut xor_rng(seed), length, amount); let v2 = sample_rejection(&mut xor_rng(seed), length, amount); assert!(v1.iter().all(|e| e < length)); assert_eq!(v1, v2); diff --git a/src/seq/mod.rs b/src/seq/mod.rs index 830ddcaf6ea..4e06bac2863 100644 --- a/src/seq/mod.rs +++ b/src/seq/mod.rs @@ -58,18 +58,11 @@ pub trait SliceRandom { where R: Rng + ?Sized; /// Produces an iterator that chooses `amount` elements from the slice at - /// random without repeating any. - /// + /// random without repeating any, and returns them in random order. + /// /// In case this API is not sufficiently flexible, use `index::sample` then /// apply the indices to the slice. /// - /// If `shuffled == true` then the sampled values will be fully shuffled; - /// otherwise the values may only partially shuffled, depending on the - /// algorithm used (i.e. biases may exist in the ordering of sampled - /// elements). Depending on the algorithm used internally, full shuffling - /// may add significant overhead for `amount` > 10 or so, but not more - /// than double the time and often much less. - /// /// Complexity is expected to be the same as `index::sample`. /// /// # Example @@ -80,16 +73,16 @@ pub trait SliceRandom { /// let sample = "Hello, audience!".as_bytes(); /// /// // collect the results into a vector: - /// let v: Vec<u8> = sample.choose_multiple(&mut rng, 3, true).cloned().collect(); + /// let v: Vec<u8> = sample.choose_multiple(&mut rng, 3).cloned().collect(); /// /// // store in a buffer: /// let mut buf = [0u8; 5]; - /// for (b, slot) in sample.choose_multiple(&mut rng, buf.len(), true).zip(buf.iter_mut()) { + /// for (b, slot) in sample.choose_multiple(&mut rng, buf.len()).zip(buf.iter_mut()) { /// *slot = *b; /// } /// ``` #[cfg(feature = "alloc")] - fn choose_multiple<R>(&self, rng: &mut R, amount: usize, shuffled: bool) -> SliceChooseIter<Self, Self::Item> + fn choose_multiple<R>(&self, rng: &mut R, amount: usize) -> SliceChooseIter<Self, Self::Item> where R: Rng + ?Sized; /// Similar to [`choose`], where the likelihood of each outcome may be @@ -315,7 +308,7 @@ impl<T> SliceRandom for [T] { } #[cfg(feature = "alloc")] - fn choose_multiple<R>(&self, rng: &mut R, amount: usize, shuffled: bool) + fn choose_multiple<R>(&self, rng: &mut R, amount: usize) -> SliceChooseIter<Self, Self::Item> where R: Rng + ?Sized { @@ -323,7 +316,7 @@ impl<T> SliceRandom for [T] { SliceChooseIter { slice: self, _phantom: Default::default(), - indices: index::sample(rng, self.len(), amount, shuffled).into_iter(), + indices: index::sample(rng, self.len(), amount).into_iter(), } } @@ -460,7 +453,7 @@ pub fn sample_slice<R, T>(rng: &mut R, slice: &[T], amount: usize) -> Vec<T> where R: Rng + ?Sized, T: Clone { - let indices = index::sample(rng, slice.len(), amount, true).into_iter(); + let indices = index::sample(rng, slice.len(), amount).into_iter(); let mut out = Vec::with_capacity(amount); out.extend(indices.map(|i| slice[i].clone())); @@ -483,7 +476,7 @@ pub fn sample_slice<R, T>(rng: &mut R, slice: &[T], amount: usize) -> Vec<T> pub fn sample_slice_ref<'a, R, T>(rng: &mut R, slice: &'a [T], amount: usize) -> Vec<&'a T> where R: Rng + ?Sized { - let indices = index::sample(rng, slice.len(), amount, true).into_iter(); + let indices = index::sample(rng, slice.len(), amount).into_iter(); let mut out = Vec::with_capacity(amount); out.extend(indices.map(|i| &slice[i])); @@ -679,8 +672,7 @@ mod test { r.fill(&mut seed); // assert the basics work - let regular = index::sample( - &mut xor_rng(seed), length, amount, true); + let regular = index::sample(&mut xor_rng(seed), length, amount); assert_eq!(regular.len(), amount); assert!(regular.iter().all(|e| e < length));