diff --git a/CHANGELOG.md b/CHANGELOG.md index 2fb5f24..b45041c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Unreleased +- Rename `StableHasherResult` to `FromStableHash` (#8) +- Use new-type for returned-hash of `SipHasher128`(`Hash`) (#8) +- Introduce multi hasher support (#8) - `StableHasher::finish` now returns a small hash instead of being fatal (#6) - Remove `StableHasher::finalize` (#4) - Import stable hasher implementation from rustc ([db8aca48129](https://github.com/rust-lang/rust/blob/db8aca48129d86b2623e3ac8cbcf2902d4d313ad/compiler/rustc_data_structures/src/)) diff --git a/src/lib.rs b/src/lib.rs index 88b5bbe..6fc9e16 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,8 +6,27 @@ mod int_overflow; mod sip128; mod stable_hasher; +/// Hashers collection +pub mod hashers { + #[doc(inline)] + pub use super::sip128::{SipHasher128, SipHasher128Hash}; + + /// Stable 128-bits Sip Hasher + /// + /// [`StableHasher`] version of [`SipHasher128`]. + /// + /// [`StableHasher`]: super::StableHasher + pub type StableSipHasher128 = super::StableHasher; +} + #[doc(inline)] pub use stable_hasher::StableHasher; #[doc(inline)] -pub use stable_hasher::StableHasherResult; +pub use stable_hasher::FromStableHash; + +#[doc(inline)] +pub use stable_hasher::ExtendedHasher; + +#[doc(inline)] +pub use hashers::{SipHasher128Hash, StableSipHasher128}; diff --git a/src/sip128.rs b/src/sip128.rs index 66a58ee..e29b21e 100644 --- a/src/sip128.rs +++ b/src/sip128.rs @@ -3,6 +3,7 @@ // This code is very hot and uses lots of arithmetic, avoid overflow checks for performance. // See https://github.com/rust-lang/rust/pull/119440#issuecomment-1874255727 use crate::int_overflow::{DebugStrictAdd, DebugStrictSub}; +use crate::ExtendedHasher; use std::hash::Hasher; use std::mem::{self, MaybeUninit}; @@ -40,6 +41,10 @@ const BUFFER_WITH_SPILL_SIZE: usize = BUFFER_WITH_SPILL_CAPACITY * ELEM_SIZE; // Index of the spill element in the buffer. const BUFFER_SPILL_INDEX: usize = BUFFER_WITH_SPILL_CAPACITY - 1; +/// Hashing result of [`SipHasher128`] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct SipHasher128Hash(pub [u64; 2]); + #[derive(Debug, Clone)] #[repr(C)] pub struct SipHasher128 { @@ -214,28 +219,6 @@ impl SipHasher128 { hasher } - #[inline] - pub fn short_write(&mut self, bytes: [u8; LEN]) { - let nbuf = self.nbuf; - debug_assert!(LEN <= 8); - debug_assert!(nbuf < BUFFER_SIZE); - debug_assert!(nbuf + LEN < BUFFER_WITH_SPILL_SIZE); - - if nbuf.debug_strict_add(LEN) < BUFFER_SIZE { - unsafe { - // The memcpy call is optimized away because the size is known. - let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf); - ptr::copy_nonoverlapping(bytes.as_ptr(), dst, LEN); - } - - self.nbuf = nbuf.debug_strict_add(LEN); - - return; - } - - unsafe { self.short_write_process_buffer(bytes) } - } - // A specialized write function for values with size <= 8 that should only // be called when the write would cause the buffer to fill. // @@ -378,13 +361,11 @@ impl SipHasher128 { } } - #[inline(always)] - pub fn finish128(mut self) -> [u64; 2] { - SipHasher128::finish128_inner(self.nbuf, &mut self.buf, self.state, self.processed) - } - + // A function for finishing the hashing. + // + // SAFETY: `buf` must be initialized up to the byte offset `nbuf`. #[inline] - fn finish128_inner( + unsafe fn finish128_inner( nbuf: usize, buf: &mut [MaybeUninit; BUFFER_WITH_SPILL_CAPACITY], mut state: State, @@ -437,6 +418,45 @@ impl SipHasher128 { } } +impl Default for SipHasher128 { + fn default() -> SipHasher128 { + SipHasher128::new_with_keys(0, 0) + } +} + +impl ExtendedHasher for SipHasher128 { + type Hash = SipHasher128Hash; + + #[inline] + fn short_write(&mut self, bytes: [u8; LEN]) { + let nbuf = self.nbuf; + debug_assert!(LEN <= 8); + debug_assert!(nbuf < BUFFER_SIZE); + debug_assert!(nbuf + LEN < BUFFER_WITH_SPILL_SIZE); + + if nbuf.debug_strict_add(LEN) < BUFFER_SIZE { + unsafe { + // The memcpy call is optimized away because the size is known. + let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf); + ptr::copy_nonoverlapping(bytes.as_ptr(), dst, LEN); + } + + self.nbuf = nbuf.debug_strict_add(LEN); + + return; + } + + unsafe { self.short_write_process_buffer(bytes) } + } + + #[inline(always)] + fn finish(mut self) -> SipHasher128Hash { + SipHasher128Hash(unsafe { + SipHasher128::finish128_inner(self.nbuf, &mut self.buf, self.state, self.processed) + }) + } +} + impl Hasher for SipHasher128 { #[inline] fn write_u8(&mut self, i: u8) { @@ -504,7 +524,9 @@ impl Hasher for SipHasher128 { fn finish(&self) -> u64 { let mut buf = self.buf.clone(); - let [a, b] = SipHasher128::finish128_inner(self.nbuf, &mut buf, self.state, self.processed); + let [a, b] = unsafe { + SipHasher128::finish128_inner(self.nbuf, &mut buf, self.state, self.processed) + }; // Combining the two halves makes sure we get a good quality hash. a.wrapping_mul(3).wrapping_add(b).to_le() diff --git a/src/sip128/tests.rs b/src/sip128/tests.rs index d9c7edb..56be6d0 100644 --- a/src/sip128/tests.rs +++ b/src/sip128/tests.rs @@ -14,12 +14,12 @@ impl<'a> Hash for Bytes<'a> { } } -fn hash_with(mut st: SipHasher128, x: &T) -> [u64; 2] { +fn hash_with(mut st: SipHasher128, x: &T) -> SipHasher128Hash { x.hash(&mut st); - st.finish128() + st.finish() } -fn hash(x: &T) -> [u64; 2] { +fn hash(x: &T) -> SipHasher128Hash { hash_with(SipHasher128::new_with_keys(0, 0), x) } @@ -119,7 +119,7 @@ fn test_siphash_1_3_test_vector() { | ((TEST_VECTOR[i][15] as u64) << 56), ]; - assert_eq!(out, expected); + assert_eq!(out.0, expected); input.push(i as u8); } } @@ -253,8 +253,8 @@ fn test_short_write_works() { h2.write(&test_i128.to_ne_bytes()); h2.write(&test_isize.to_ne_bytes()); - let h1_hash = h1.finish128(); - let h2_hash = h2.finish128(); + let h1_hash = h1.finish(); + let h2_hash = h2.finish(); assert_eq!(h1_hash, h2_hash); } @@ -279,8 +279,8 @@ macro_rules! test_fill_buffer { h2.write(s); h2.write(x_bytes); - let h1_hash = h1.finish128(); - let h2_hash = h2.finish128(); + let h1_hash = h1.finish(); + let h2_hash = h2.finish(); assert_eq!(h1_hash, h2_hash); } @@ -306,10 +306,14 @@ fn test_fill_buffer() { #[test] fn test_finish() { + fn hash(h: &H) -> u64 { + h.finish() + } + let mut hasher = SipHasher128::new_with_keys(0, 0); hasher.write_isize(0xF0); hasher.write_isize(0xF0010); - assert_eq!(hasher.finish(), hasher.finish()); + assert_eq!(hash(&hasher), hash(&hasher)); } diff --git a/src/stable_hasher.rs b/src/stable_hasher.rs index 8239840..7c827d5 100644 --- a/src/stable_hasher.rs +++ b/src/stable_hasher.rs @@ -1,13 +1,61 @@ //! Stable hasher adapted for cross-platform independent hash. -use crate::sip128::SipHasher128; - use std::fmt; use std::hash::Hasher; #[cfg(test)] mod tests; +/// Extended [`Hasher`] trait for use with [`StableHasher`]. +/// +/// It permits returning an arbitrary type as the [`Self::Hash`] type +/// contrary to the [`Hasher`] trait which can only return `u64`. This +/// is useful when the hasher uses a different representation. +/// +/// # Example +/// +/// ``` +/// use std::hash::Hasher; +/// use rustc_stable_hash::ExtendedHasher; +/// +/// struct BogusHasher(u128); +/// +/// impl Hasher for BogusHasher { +/// fn write(&mut self, a: &[u8]) { +/// # self.0 = a.iter().fold(0u128, |acc, a| acc + (*a as u128)) + self.0; +/// // ... +/// } +/// +/// fn finish(&self) -> u64 { +/// self.0 as u64 // really bogus +/// } +/// } +/// +/// impl ExtendedHasher for BogusHasher { +/// type Hash = u128; +/// +/// fn short_write(&mut self, bytes: [u8; LEN]) { +/// self.write(&bytes) +/// } +/// +/// fn finish(self) -> Self::Hash { +/// self.0 +/// } +/// } +/// ``` +pub trait ExtendedHasher: Hasher { + /// Type returned by the hasher. + type Hash; + + /// Optimized version of [`Hasher::write`] but for small write. + fn short_write(&mut self, bytes: [u8; LEN]) { + self.write(&bytes); + } + + /// Finalization method of the hasher to return the [`Hash`]. + fn finish(self) -> Self::Hash; +} + /// A Stable Hasher adapted for cross-platform independent hash. /// /// When hashing something that ends up affecting properties like symbol names, @@ -21,87 +69,120 @@ mod tests; /// # Example /// /// ``` -/// use rustc_stable_hash::{StableHasher, StableHasherResult}; +/// use rustc_stable_hash::hashers::{StableSipHasher128, SipHasher128Hash}; +/// use rustc_stable_hash::{StableHasher, FromStableHash}; /// use std::hash::Hasher; /// /// struct Hash128([u64; 2]); -/// impl StableHasherResult for Hash128 { -/// fn finish(hash: [u64; 2]) -> Hash128 { +/// impl FromStableHash for Hash128 { +/// type Hash = SipHasher128Hash; +/// +/// fn from(SipHasher128Hash(hash): SipHasher128Hash) -> Hash128 { /// Hash128(hash) /// } /// } /// -/// let mut hasher = StableHasher::new(); +/// let mut hasher = StableSipHasher128::new(); /// hasher.write_usize(0xFA); /// /// let hash: Hash128 = hasher.finish(); /// ``` #[must_use] -pub struct StableHasher { - state: SipHasher128, +pub struct StableHasher { + state: H, } -/// Trait for retrieving the result of the stable hashing operation. +/// Trait for processing the result of the stable hashing operation. /// /// # Example /// /// ``` -/// use rustc_stable_hash::{StableHasher, StableHasherResult}; +/// use rustc_stable_hash::{StableHasher, FromStableHash}; /// /// struct Hash128(u128); /// -/// impl StableHasherResult for Hash128 { -/// fn finish(hash: [u64; 2]) -> Hash128 { +/// impl FromStableHash for Hash128 { +/// type Hash = [u64; 2]; +/// +/// fn from(hash: [u64; 2]) -> Hash128 { /// let upper: u128 = hash[0] as u128; /// let lower: u128 = hash[1] as u128; -/// +/// /// Hash128((upper << 64) | lower) /// } /// } /// ``` -pub trait StableHasherResult: Sized { - /// Retrieving the finalized state of the [`StableHasher`] and construct - /// an [`Self`] containing the hash. - fn finish(hasher: [u64; 2]) -> Self; +pub trait FromStableHash: Sized { + type Hash; + + /// Convert the finalized state of a [`StableHasher`] and construct + /// an [`Self`] containing the processed hash. + fn from(hash: Self::Hash) -> Self; } -impl StableHasher { +impl StableHasher { /// Creates a new [`StableHasher`]. /// /// To be used with the [`Hasher`] implementation and [`StableHasher::finish`]. #[inline] #[must_use] pub fn new() -> Self { + Default::default() + } +} + +impl Default for StableHasher { + /// Creates a new [`StableHasher`]. + /// + /// To be used with the [`Hasher`] implementation and [`StableHasher::finish`]. + #[inline] + #[must_use] + fn default() -> Self { StableHasher { - state: SipHasher128::new_with_keys(0, 0), + state: Default::default(), } } +} + +impl StableHasher { + /// Creates a new [`StableHasher`] from an already created [`ExtendedHasher`]. + /// + /// Useful when wanting to initialize a hasher with different parameters/keys. + /// + /// **Important**: Any use of the hasher before being given to a [`StableHasher`] + /// is not covered by this crate guarentees and will make the resulting hash + /// NOT platform independent. + #[inline] + #[must_use] + pub fn with_hasher(state: H) -> Self { + StableHasher { state } + } /// Returns the typed-hash value for the values written. /// /// The resulting typed-hash value is constructed from an - /// [`StableHasherResult`] implemenation. + /// [`FromStableHash`] implemenation. /// /// To be used in-place of [`Hasher::finish`]. #[inline] #[must_use] - pub fn finish(self) -> W { - W::finish(self.state.finish128()) + pub fn finish>(self) -> W { + W::from(self.state.finish()) } } -impl fmt::Debug for StableHasher { +impl fmt::Debug for StableHasher { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{:?}", self.state) } } -impl Hasher for StableHasher { +impl Hasher for StableHasher { /// Returns a combined hash. /// /// For greater precision use instead [`StableHasher::finish`]. fn finish(&self) -> u64 { - self.state.finish() + Hasher::finish(&self.state) } #[inline] @@ -192,7 +273,7 @@ impl Hasher for StableHasher { // Cold path #[cold] #[inline(never)] - fn hash_value(state: &mut SipHasher128, value: u64) { + fn hash_value(state: &mut H, value: u64) { state.write_u8(0xFF); state.short_write(value.to_le_bytes()); } diff --git a/src/stable_hasher/tests.rs b/src/stable_hasher/tests.rs index e6560db..3eec9c9 100644 --- a/src/stable_hasher/tests.rs +++ b/src/stable_hasher/tests.rs @@ -1,6 +1,7 @@ use std::hash::Hash; use super::*; +use crate::{SipHasher128Hash, StableSipHasher128}; // The tests below compare the computed hashes to particular expected values // in order to test that we produce the same results on different platforms, @@ -12,8 +13,10 @@ use super::*; #[derive(Debug, PartialEq)] struct TestHash([u64; 2]); -impl StableHasherResult for TestHash { - fn finish(hash: [u64; 2]) -> TestHash { +impl FromStableHash for TestHash { + type Hash = SipHasher128Hash; + + fn from(SipHasher128Hash(hash): Self::Hash) -> TestHash { TestHash(hash) } } @@ -35,7 +38,7 @@ fn test_hash_integers() { let test_i128 = -500_i128; let test_isize = -600_isize; - let mut h = StableHasher::new(); + let mut h = StableSipHasher128::new(); test_u8.hash(&mut h); test_u16.hash(&mut h); test_u32.hash(&mut h); @@ -60,7 +63,7 @@ fn test_hash_usize() { // Test that usize specifically is handled consistently across platforms. let test_usize = 0xABCDEF01_usize; - let mut h = StableHasher::new(); + let mut h = StableSipHasher128::new(); test_usize.hash(&mut h); // This depends on the hashing algorithm. See note at top of file. @@ -74,7 +77,7 @@ fn test_hash_isize() { // Test that isize specifically is handled consistently across platforms. let test_isize = -7_isize; - let mut h = StableHasher::new(); + let mut h = StableSipHasher128::new(); test_isize.hash(&mut h); // This depends on the hashing algorithm. See note at top of file. @@ -84,7 +87,7 @@ fn test_hash_isize() { } fn hash(t: &T) -> TestHash { - let mut h = StableHasher::new(); + let mut h = StableSipHasher128::new(); t.hash(&mut h); h.finish() }