Skip to content
This repository has been archived by the owner on Nov 6, 2020. It is now read-only.

Commit

Permalink
remove the redundant hasher in Bloom
Browse files Browse the repository at this point in the history
  • Loading branch information
Hawstein committed Aug 29, 2017
1 parent 36d046f commit c16f165
Showing 1 changed file with 15 additions and 21 deletions.
36 changes: 15 additions & 21 deletions util/bloom/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,6 @@ use std::hash::{Hash, Hasher};
use std::collections::HashSet;
use siphasher::sip::SipHasher;

// TODO [ToDr] Both hashers are exactly the same - no point to keep two.
const NUMBER_OF_HASHERS: usize = 2;

/// BitVec structure with journalling
/// Every time any of the blocks is getting set it's index is tracked
/// and can be then drained by `drain` method
Expand Down Expand Up @@ -80,8 +77,6 @@ pub struct Bloom {
bitmap: BitVecJournal,
bitmap_bits: u64,
k_num: u32,
// TODO [ToDr] Both hashers are exactly the same - no point to keep two.
sips: [SipHasher; NUMBER_OF_HASHERS],
}

impl Bloom {
Expand All @@ -93,12 +88,10 @@ impl Bloom {
let bitmap_bits = (bitmap_size as u64) * 8u64;
let k_num = Bloom::optimal_k_num(bitmap_bits, items_count);
let bitmap = BitVecJournal::new(bitmap_bits as usize);
let sips = [SipHasher::new(), SipHasher::new()];
Bloom {
bitmap: bitmap,
bitmap_bits: bitmap_bits,
k_num: k_num,
sips: sips,
}
}

Expand All @@ -107,12 +100,10 @@ impl Bloom {
let bitmap_size = parts.len() * 8;
let bitmap_bits = (bitmap_size as u64) * 8u64;
let bitmap = BitVecJournal::from_parts(parts);
let sips = [SipHasher::new(), SipHasher::new()];
Bloom {
bitmap: bitmap,
bitmap_bits: bitmap_bits,
k_num: k_num,
sips: sips,
}
}

Expand All @@ -139,9 +130,9 @@ impl Bloom {
pub fn set<T>(&mut self, item: T)
where T: Hash
{
let mut hashes = [0u64, 0u64];
let base_hash = Bloom::sip_hash(&item);
for k_i in 0..self.k_num {
let bit_offset = (self.bloom_hash(&mut hashes, &item, k_i) % self.bitmap_bits) as usize;
let bit_offset = (Bloom::bloom_hash(base_hash, k_i) % self.bitmap_bits) as usize;
self.bitmap.set(bit_offset);
}
}
Expand All @@ -151,9 +142,9 @@ impl Bloom {
pub fn check<T>(&self, item: T) -> bool
where T: Hash
{
let mut hashes = [0u64, 0u64];
let base_hash = Bloom::sip_hash(&item);
for k_i in 0..self.k_num {
let bit_offset = (self.bloom_hash(&mut hashes, &item, k_i) % self.bitmap_bits) as usize;
let bit_offset = (Bloom::bloom_hash(base_hash, k_i) % self.bitmap_bits) as usize;
if !self.bitmap.get(bit_offset) {
return false;
}
Expand All @@ -178,17 +169,20 @@ impl Bloom {
cmp::max(k_num, 1)
}

fn bloom_hash<T>(&self, hashes: &mut [u64; NUMBER_OF_HASHERS], item: &T, k_i: u32) -> u64
fn sip_hash<T>(item: &T) -> u64
where T: Hash
{
if k_i < NUMBER_OF_HASHERS as u32 {
let mut sip = self.sips[k_i as usize].clone();
item.hash(&mut sip);
let hash = sip.finish();
hashes[k_i as usize] = hash;
hash
let mut sip = SipHasher::new();
item.hash(&mut sip);
let hash = sip.finish();
hash
}

fn bloom_hash(base_hash: u64, k_i: u32) -> u64 {
if k_i < 2 {
base_hash
} else {
hashes[0].wrapping_add((k_i as u64).wrapping_mul(hashes[1]) % 0xffffffffffffffc5)
base_hash.wrapping_add((k_i as u64).wrapping_mul(base_hash) % 0xffffffffffffffc5)
}
}

Expand Down

0 comments on commit c16f165

Please sign in to comment.