Implement HighPrecision

rust-random · Jun 25, 2018 · 20b0017 · 20b0017
1 parent b7b1176
commit 20b0017
Show file tree

Hide file tree

Showing 3 changed files with 265 additions and 2 deletions.
diff --git a/benches/distributions.rs b/benches/distributions.rs
@@ -84,6 +84,9 @@ distr_int!(distr_uniform_i128, i128, Uniform::new(-123_456_789_123i128, 123_456_
 
 distr_float!(distr_uniform_f32, f32, Uniform::new(2.26f32, 2.319));
 distr_float!(distr_uniform_f64, f64, Uniform::new(2.26f64, 2.319));
+distr_float!(distr_highprecision1_f64, f64, HighPrecision::new(2.26f64, 2.319));
+distr_float!(distr_highprecision2_f64, f64, HighPrecision::new(-1.0f64 / 3.0, 2.319));
+distr_float!(distr_highprecision3_f64, f64, HighPrecision::new(0.001f64, 123_456_789_012_345.987));
 
 // standard
 distr_int!(distr_standard_i8, i8, Standard);

diff --git a/src/distributions/float.rs b/src/distributions/float.rs
@@ -13,6 +13,7 @@
 use core::mem;
 use Rng;
 use distributions::{Distribution, Standard};
+use distributions::uniform::{SampleUniform, UniformSampler};
 
 /// A distribution to sample floating point numbers uniformly in the half-open
 /// interval `(0, 1]`, i.e. including 1 but not 0.
@@ -143,11 +144,175 @@ macro_rules! float_impls {
 float_impls! { f32, u32, 23, 127 }
 float_impls! { f64, u64, 52, 1023 }
 
+/// A distribution to do high precision sampling of floating point numbers
+/// uniformly in a given range. This is similar to Uniform, but samples
+/// numbers with the full precision of the floating-point type used, at the
+/// cost of slower performance.
+#[derive(Clone, Copy, Debug)]
+pub struct HighPrecision {
+    low_as_int: i64,
+    high_as_int: i64,
+    exponent: u16,
+    distribution: <i64 as SampleUniform>::Sampler,
+}
+
+impl HighPrecision {
+    /// Returns a new HighPrecision distribution in the `[low, high)` range.
+    pub fn new(low: f64, high: f64) -> Self {
+        fn bitmask(bits: u64) -> u64 {
+            if bits >= 64 { -1i64 as u64 } else { (1u64 << bits) - 1u64 }
+        }
+        fn round_neg_inf_shr(val: i64, n: u16) -> i64 {
+            if n < 64 {
+                val >> n
+            } else if val >= 0 {
+                0
+            } else {
+                -1
+            }
+        }
+        fn round_pos_inf_shr(val: i64, n: u16) -> i64 {
+            -round_neg_inf_shr(-val, n)
+        }
+        fn parse(val: f64) -> (i64, u16, i64) {
+            let bits = val.to_bits();
+            let mut mant = (bits & bitmask(52)) as i64;
+            let mut exp = ((bits >> 52) & bitmask(11)) as u16;
+            let neg = (bits >> 63) == 1;
+            let mut as_int = (bits & bitmask(63)) as i64;
+            if exp != 0 {
+                mant |= 1i64 << 52;
+            } else {
+                exp = 1;
+            }
+            if neg {
+                mant *= -1;
+                as_int *= -1;
+            }
+            (mant, exp, as_int)
+        }
+
+        assert!(low.is_finite() && high.is_finite(), "HighPrecision::new called with non-finite limit");
+        assert!(low < high, "HighPrecision::new called with low >= high");
+
+        let (mut mant_low, exp_low, low_as_int) = parse(low);
+        let (mut mant_high, exp_high, high_as_int) = parse(high);
+
+        let exp;
+        if exp_high >= exp_low {
+            exp = exp_high;
+            mant_low = round_neg_inf_shr(mant_low, exp_high - exp_low);
+        } else {
+            exp = exp_low;
+            mant_high = round_pos_inf_shr(mant_high, exp_low - exp_high);
+        }
+
+        HighPrecision {
+            low_as_int,
+            high_as_int,
+            exponent: exp,
+            distribution: <i64 as SampleUniform>::Sampler::new(mant_low, mant_high),
+        }
+    }
+}
+
+impl Distribution<f64> for HighPrecision {
+    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> f64 {
+        fn bitmask(bits: u64) -> u64 {
+            if bits >= 64 { -1i64 as u64 } else { (1u64 << bits) - 1u64 }
+        }
+        loop {
+            let signed_mant = self.distribution.sample(rng);
+            // Operate on the absolute value so that we can count bit-sizes
+            // correctly
+            let is_neg = signed_mant < 0;
+            let mut mantissa = signed_mant.abs() as u64;
+
+            // If the resulting mantissa has too few bits, arithmetically add additional
+            // bits from rng. When `mant` represents a negative number, this means
+            // subtracting the generated bits.
+            const GOAL_ZEROS: u16 = 64 - 53;
+            let mut exp = self.exponent;
+            let mut zeros = mantissa.leading_zeros() as u16;
+            while zeros > GOAL_ZEROS && exp > 1 {
+                let additional = ::core::cmp::min(zeros - GOAL_ZEROS, exp - 1);
+                let additional_bits = rng.gen::<u64>() >> (64 - additional);
+                mantissa <<= additional;
+                if !is_neg {
+                    mantissa |= additional_bits;
+                } else {
+                    mantissa -= additional_bits;
+                }
+                exp -= additional;
+                zeros = mantissa.leading_zeros() as u16;
+            }
+
+            // At this point, if we generate and add more bits, we're just
+            // going to have to round them off. Since we round towards negative
+            // infinity, i.e. the opposite direction of the added bits, we'll
+            // just get back to exactly where we are now.
+
+            // There is an edgecase if the mantissa is negative 0x0010_0000_0000_0000.
+            // While this number is already 53 bits, if we subtract more
+            // geneated bits from this number, we will lose one bit at the top
+            // and get fewer total bits. That means that we can fit an extra
+            // bit at the end, which if it's a zero will prevent rounding from
+            // getting us back to the original value.
+            if mantissa == (1u64 << 52) && is_neg && exp > 1 && rng.gen::<bool>() {
+                mantissa = bitmask(53);
+                exp -= 1;
+            }
+
+            // Handle underflow values
+            if mantissa < (1u64 << 52) {
+                debug_assert_eq!(exp, 1);
+                exp = 0;
+            }
+
+            // Merge exponent and mantissa into final result
+            let mut res = (mantissa & bitmask(52)) |
+                          ((exp as u64) << 52);
+            let mut res_as_int = res as i64;
+            if is_neg {
+                res_as_int *= -1;
+                res |= 1u64 << 63;
+            }
+
+            // Check that we're within the requested bounds. These checks can
+            // only fail on the side that was shifted and rounded during
+            // initial parsing.
+            if self.low_as_int <= res_as_int && res_as_int < self.high_as_int {
+                return f64::from_bits(res);
+            }
+
+            // Assert that we got here due to rounding
+            #[cfg(debug_assertions)]
+            {
+                let exp_low = (self.low_as_int.abs() as u64 >> 52) & bitmask(11);
+                let exp_high = (self.high_as_int.abs() as u64 >> 52) & bitmask(11);
+                let mant_low = self.low_as_int.abs() as u64 & bitmask(52);
+                let mant_high = self.high_as_int.abs() as u64 & bitmask(52);
+                if res_as_int < self.low_as_int {
+                    assert!(exp_low < exp_high);
+                    assert!(mant_low & bitmask(exp_high - exp_low) != 0);
+                }
+                if res_as_int >= self.high_as_int {
+                    assert!(exp_high < exp_low);
+                    assert!(mant_high & bitmask(exp_low - exp_high) != 0);
+                }
+            }
+
+            // If not start over. We're avoiding reusing any of the previous
+            // computation in order to avoid introducing bias, and to keep
+            // things simple since this should be rare.
+        }
+    }
+}
 
 #[cfg(test)]
 mod tests {
     use Rng;
-    use distributions::{Open01, OpenClosed01};
+    use super::*;
     use rngs::mock::StepRng;
 
     const EPSILON32: f32 = ::core::f32::EPSILON;
@@ -203,4 +368,99 @@ mod tests {
         assert_eq!(max.sample::<f32, _>(Open01), 1.0 - EPSILON32 / 2.0);
         assert_eq!(max.sample::<f64, _>(Open01), 1.0 - EPSILON64 / 2.0);
     }
+
+    #[cfg(feature = "alloc")]
+    #[test]
+    fn test_highprecision() {
+        fn to_signed_bits(val: f64) -> i64 {
+            if val >= 0.0 {
+                val.to_bits() as i64
+            } else {
+                -((-val).to_bits() as i64)
+            }
+        }
+        fn from_signed_bits(val: i64) -> f64 {
+            if val >= 0 {
+                f64::from_bits(val as u64)
+            } else {
+                -f64::from_bits(-val as u64)
+            }
+        }
+
+        let mut r = ::test::rng(601);
+
+        let mut vals: Vec<f64> =
+          [0i64,
+           0x0000_0f00_0000_0000,
+           0x0001_0000_0000_0000,
+           0x0004_0000_0000_0000,
+           0x0008_0000_0000_0000,
+           0x0010_0000_0000_0000,
+           0x0020_0000_0000_0000,
+           0x0040_0000_0000_0000,
+           0x0100_0000_0000_0000,
+           0x00cd_ef12_3456_789a,
+           0x0100_ffff_ffff_ffff,
+           0x010f_ffff_ffff_ffff,
+           0x0400_1234_5678_abcd,
+           0x7fef_ffff_ffff_ffff,
+           ].iter().cloned()
+            .flat_map(|x| (-2i64..3i64).map(move |y| x + y))
+            .map(|x| f64::from_bits(x as u64))
+            .flat_map(|x| vec![x, -x].into_iter())
+            .filter(|x| x.is_finite())
+            .collect();
+        vals.sort_by(|a, b| a.partial_cmp(b).unwrap());
+        vals.dedup();
+
+        for a in vals.iter().cloned() {
+            for b in vals.iter().cloned().filter(|&b| b > a) {
+                let hp = HighPrecision::new(a, b);
+                let a_bits = to_signed_bits(a);
+                let b_bits = to_signed_bits(b);
+
+                // If a and b are "close enough", we can verify the full distribution
+                if (b_bits.wrapping_sub(a_bits) as u64) < 100 {
+                    let mut counts = Vec::<i32>::with_capacity((b_bits - a_bits) as usize);
+                    counts.resize((b_bits - a_bits) as usize, 0);
+                    for _ in 0..1000 {
+                        let res = hp.sample(&mut r);
+                        counts[(to_signed_bits(res) - a_bits) as usize] += 1;
+                    }
+                    for (count, i) in counts.iter().zip(0i64..) {
+                        let expected = 1000.0f64 *
+                                       (from_signed_bits(a_bits + i + 1) -
+                                        from_signed_bits(a_bits + i)) / (b - a);
+                        let err = (*count as f64 - expected) / expected;
+                        assert!(err.abs() <= 0.2);
+                    }
+                } else {
+                    // Rough estimate that the distribution is correct
+                    let step = if (b - a).is_finite() {
+                        (b - a) / 10.0
+                    } else {
+                        b / 10.0 - a / 10.0
+                    };
+                    assert!(step.is_finite());
+                    let mut counts = Vec::<i32>::with_capacity(10);
+                    counts.resize(10, 0);
+                    for _ in 0..3000 {
+                        let res = hp.sample(&mut r);
+                        assert!(a <= res && res < b);
+                        let index = if (res - a).is_finite() {
+                            (res - a) / step
+                        } else {
+                            res / step - a / step
+                        } as usize;
+                        counts[index] += 1;
+                    }
+                    for count in &counts {
+                        let expected = 3000.0f64 / 10.0;
+                        let err = (*count as f64 - expected) / expected;
+                        assert!(err.abs() <= 0.25);
+                    }
+                }
+            }
+        }
+    }
 }
diff --git a/src/distributions/mod.rs b/src/distributions/mod.rs
@@ -172,7 +172,7 @@ use Rng;
 
 #[doc(inline)] pub use self::other::Alphanumeric;
 #[doc(inline)] pub use self::uniform::Uniform;
-#[doc(inline)] pub use self::float::{OpenClosed01, Open01};
+#[doc(inline)] pub use self::float::{OpenClosed01, Open01, HighPrecision};
 #[cfg(feature="std")]
 #[doc(inline)] pub use self::gamma::{Gamma, ChiSquared, FisherF, StudentT};
 #[cfg(feature="std")]