Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve SmallRng initialization performance #1482

Merged
merged 4 commits into from
Oct 7, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ You may also find the [Upgrade Guide](https://rust-random.github.io/book/update.
- Add `UniformUsize` and use to make `Uniform` for `usize` portable (#1487)
- Remove support for generating `isize` and `usize` values with `Standard`, `Uniform` and `Fill` and usage as a `WeightedAliasIndex` weight (#1487)
- Require `Clone` and `AsRef` bound for `SeedableRng::Seed`. (#1491)
- Improve SmallRng initialization performance

## [0.9.0-alpha.1] - 2024-03-18
- Add the `Slice::num_choices` method to the Slice distribution (#1402)
Expand Down
58 changes: 57 additions & 1 deletion benches/benches/generators.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use rand_pcg::{Pcg32, Pcg64, Pcg64Dxsm, Pcg64Mcg};
criterion_group!(
name = benches;
config = Criterion::default();
targets = gen_bytes, gen_u32, gen_u64, init_gen, reseeding_bytes
targets = gen_bytes, gen_u32, gen_u64, init_gen, init_from_u64, init_from_seed, reseeding_bytes
);
criterion_main!(benches);

Expand Down Expand Up @@ -133,6 +133,62 @@ pub fn init_gen(c: &mut Criterion) {
bench::<ChaCha12Rng>(&mut g, "chacha12");
bench::<ChaCha20Rng>(&mut g, "chacha20");
bench::<StdRng>(&mut g, "std");
bench::<SmallRng>(&mut g, "small");

g.finish()
}

pub fn init_from_u64(c: &mut Criterion) {
let mut g = c.benchmark_group("init_from_u64");
g.warm_up_time(Duration::from_millis(500));
g.measurement_time(Duration::from_millis(1000));

fn bench<R: SeedableRng>(g: &mut BenchmarkGroup<WallTime>, name: &str) {
g.bench_function(name, |b| {
let mut rng = Pcg32::from_os_rng();
let seed = rng.random();
b.iter(|| R::seed_from_u64(black_box(seed)));
});
}

bench::<Pcg32>(&mut g, "pcg32");
bench::<Pcg64>(&mut g, "pcg64");
bench::<Pcg64Mcg>(&mut g, "pcg64mcg");
bench::<Pcg64Dxsm>(&mut g, "pcg64dxsm");
bench::<ChaCha8Rng>(&mut g, "chacha8");
bench::<ChaCha12Rng>(&mut g, "chacha12");
bench::<ChaCha20Rng>(&mut g, "chacha20");
bench::<StdRng>(&mut g, "std");
bench::<SmallRng>(&mut g, "small");

g.finish()
}

pub fn init_from_seed(c: &mut Criterion) {
let mut g = c.benchmark_group("init_from_seed");
g.warm_up_time(Duration::from_millis(500));
g.measurement_time(Duration::from_millis(1000));

fn bench<R: SeedableRng>(g: &mut BenchmarkGroup<WallTime>, name: &str)
where
rand::distr::Standard: Distribution<<R as SeedableRng>::Seed>,
{
g.bench_function(name, |b| {
let mut rng = Pcg32::from_os_rng();
let seed = rng.random();
b.iter(|| R::from_seed(black_box(seed.clone())));
});
}

bench::<Pcg32>(&mut g, "pcg32");
bench::<Pcg64>(&mut g, "pcg64");
bench::<Pcg64Mcg>(&mut g, "pcg64mcg");
bench::<Pcg64Dxsm>(&mut g, "pcg64dxsm");
bench::<ChaCha8Rng>(&mut g, "chacha8");
bench::<ChaCha12Rng>(&mut g, "chacha12");
bench::<ChaCha20Rng>(&mut g, "chacha20");
bench::<StdRng>(&mut g, "std");
bench::<SmallRng>(&mut g, "small");

g.finish()
}
Expand Down
3 changes: 2 additions & 1 deletion src/rngs/small.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ impl SeedableRng for SmallRng {

#[inline(always)]
fn from_seed(seed: Self::Seed) -> Self {
// With MSRV >= 1.77: let seed = *seed.first_chunk().unwrap();
arthurprs marked this conversation as resolved.
Show resolved Hide resolved
// This is for compatibility with 32-bit platforms where Rng::Seed has a different seed size
// With MSRV >= 1.77: let seed = *seed.first_chunk().unwrap()
const LEN: usize = core::mem::size_of::<<Rng as SeedableRng>::Seed>();
let seed = (&seed[..LEN]).try_into().unwrap();
SmallRng(Rng::from_seed(seed))
Expand Down
35 changes: 28 additions & 7 deletions src/rngs/xoshiro128plusplus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,29 +33,36 @@ impl SeedableRng for Xoshiro128PlusPlus {
/// mapped to a different seed.
#[inline]
fn from_seed(seed: [u8; 16]) -> Xoshiro128PlusPlus {
if seed.iter().all(|&x| x == 0) {
return Self::seed_from_u64(0);
}
let mut state = [0; 4];
read_u32_into(&seed, &mut state);
// Check for zero on aligned integers for better code generation.
// Furtermore, seed_from_u64(0) will expand to a constant when optimized.
if state.iter().all(|&x| x == 0) {
dhardy marked this conversation as resolved.
Show resolved Hide resolved
return Self::seed_from_u64(0);
}
Xoshiro128PlusPlus { s: state }
}

/// Create a new `Xoshiro128PlusPlus` from a `u64` seed.
///
/// This uses the SplitMix64 generator internally.
#[inline]
fn seed_from_u64(mut state: u64) -> Self {
const PHI: u64 = 0x9e3779b97f4a7c15;
let mut seed = Self::Seed::default();
for chunk in seed.as_mut().chunks_mut(8) {
let mut s = [0; 4];
for i in s.chunks_exact_mut(2) {
state = state.wrapping_add(PHI);
let mut z = state;
z = (z ^ (z >> 30)).wrapping_mul(0xbf58476d1ce4e5b9);
z = (z ^ (z >> 27)).wrapping_mul(0x94d049bb133111eb);
z = z ^ (z >> 31);
chunk.copy_from_slice(&z.to_le_bytes());
i[0] = z as u32;
i[1] = (z >> 32) as u32;
}
Self::from_seed(seed)
// By using a non-zero PHI we are guaranteed to generate a non-zero state
// Thus preventing a recursion between from_seed and seed_from_u64.
debug_assert_ne!(s, [0; 4]);
Xoshiro128PlusPlus { s }
}
}

Expand Down Expand Up @@ -113,4 +120,18 @@ mod tests {
assert_eq!(rng.next_u32(), e);
}
}

#[test]
fn stable_seed_from_u64() {
// We don't guarantee value-stability for SmallRng but this
// could influence keeping stability whenever possible (e.g. after optimizations).
let mut rng = Xoshiro128PlusPlus::seed_from_u64(0);
let expected = [
1179900579, 1938959192, 3089844957, 3657088315, 1015453891, 479942911, 3433842246,
669252886, 3985671746, 2737205563,
];
for &e in &expected {
assert_eq!(rng.next_u32(), e);
}
}
}
42 changes: 35 additions & 7 deletions src/rngs/xoshiro256plusplus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,29 +33,35 @@ impl SeedableRng for Xoshiro256PlusPlus {
/// mapped to a different seed.
#[inline]
fn from_seed(seed: [u8; 32]) -> Xoshiro256PlusPlus {
if seed.iter().all(|&x| x == 0) {
return Self::seed_from_u64(0);
}
let mut state = [0; 4];
read_u64_into(&seed, &mut state);
// Check for zero on aligned integers for better code generation.
// Furtermore, seed_from_u64(0) will expand to a constant when optimized.
if state.iter().all(|&x| x == 0) {
return Self::seed_from_u64(0);
}
Xoshiro256PlusPlus { s: state }
}

/// Create a new `Xoshiro256PlusPlus` from a `u64` seed.
///
/// This uses the SplitMix64 generator internally.
#[inline]
fn seed_from_u64(mut state: u64) -> Self {
const PHI: u64 = 0x9e3779b97f4a7c15;
let mut seed = Self::Seed::default();
for chunk in seed.as_mut().chunks_mut(8) {
let mut s = [0; 4];
for i in s.iter_mut() {
state = state.wrapping_add(PHI);
let mut z = state;
z = (z ^ (z >> 30)).wrapping_mul(0xbf58476d1ce4e5b9);
z = (z ^ (z >> 27)).wrapping_mul(0x94d049bb133111eb);
z = z ^ (z >> 31);
chunk.copy_from_slice(&z.to_le_bytes());
*i = z;
}
Self::from_seed(seed)
// By using a non-zero PHI we are guaranteed to generate a non-zero state
// Thus preventing a recursion between from_seed and seed_from_u64.
debug_assert_ne!(s, [0; 4]);
Xoshiro256PlusPlus { s }
}
}

Expand Down Expand Up @@ -126,4 +132,26 @@ mod tests {
assert_eq!(rng.next_u64(), e);
}
}

#[test]
fn stable_seed_from_u64() {
// We don't guarantee value-stability for SmallRng but this
// could influence keeping stability whenever possible (e.g. after optimizations).
let mut rng = Xoshiro256PlusPlus::seed_from_u64(0);
let expected = [
5987356902031041503,
7051070477665621255,
6633766593972829180,
211316841551650330,
9136120204379184874,
379361710973160858,
15813423377499357806,
15596884590815070553,
5439680534584881407,
1369371744833522710,
];
for &e in &expected {
assert_eq!(rng.next_u64(), e);
}
}
}