Skip to content

Commit

Permalink
Implementing a cache friendly version of Cuckoo Hash
Browse files Browse the repository at this point in the history
Summary: This implements a cache friendly version of Cuckoo Hash in which, in case of collission, we try to insert in next few locations. The size of the neighborhood to check is taken as an input parameter in builder and stored in the table.

Test Plan:
make check all
cuckoo_table_{db,reader,builder}_test

Reviewers: sdong, ljin

Reviewed By: ljin

Subscribers: leveldb

Differential Revision: https://reviews.facebook.net/D22455
  • Loading branch information
adyarshyam committed Aug 28, 2014
1 parent d977e55 commit 7f71448
Show file tree
Hide file tree
Showing 9 changed files with 308 additions and 139 deletions.
37 changes: 34 additions & 3 deletions include/rocksdb/table.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,15 +227,46 @@ extern TableFactory* NewPlainTableFactory(const PlainTableOptions& options =
PlainTableOptions());

struct CuckooTablePropertyNames {
// The key that is used to fill empty buckets.
static const std::string kEmptyKey;
// Fixed length of value.
static const std::string kValueLength;
static const std::string kNumHashTable;
static const std::string kMaxNumBuckets;
// Number of hash functions used in Cuckoo Hash.
static const std::string kNumHashFunc;
// It denotes the number of buckets in a Cuckoo Block. Given a key and a
// particular hash function, a Cuckoo Block is a set of consecutive buckets,
// where starting bucket id is given by the hash function on the key. In case
// of a collision during inserting the key, the builder tries to insert the
// key in other locations of the cuckoo block before using the next hash
// function. This reduces cache miss during read operation in case of
// collision.
static const std::string kCuckooBlockSize;
// Size of the hash table. Use this number to compute the modulo of hash
// function. The actual number of buckets will be kMaxHashTableSize +
// kCuckooBlockSize - 1. The last kCuckooBlockSize-1 buckets are used to
// accommodate the Cuckoo Block from end of hash table, due to cache friendly
// implementation.
static const std::string kHashTableSize;
// Denotes if the key sorted in the file is Internal Key (if false)
// or User Key only (if true).
static const std::string kIsLastLevel;
};

// Cuckoo Table Factory for SST table format using Cache Friendly Cuckoo Hashing
// @hash_table_ratio: Determines the utilization of hash tables. Smaller values
// result in larger hash tables with fewer collisions.
// @max_search_depth: A property used by builder to determine the depth to go to
// to search for a path to displace elements in case of
// collision. See Builder.MakeSpaceForKey method. Higher
// values result in more efficient hash tables with fewer
// lookups but take more time to build.
// @cuckoo_block_size: In case of collision while inserting, the builder
// attempts to insert in the next cuckoo_block_size
// locations before skipping over to the next Cuckoo hash
// function. This makes lookups more cache friendly in case
// of collisions.
extern TableFactory* NewCuckooTableFactory(double hash_table_ratio = 0.9,
uint32_t max_search_depth = 100);
uint32_t max_search_depth = 100, uint32_t cuckoo_block_size = 5);

#endif // ROCKSDB_LITE

Expand Down
139 changes: 82 additions & 57 deletions table/cuckoo_table_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,28 +24,31 @@
namespace rocksdb {
const std::string CuckooTablePropertyNames::kEmptyKey =
"rocksdb.cuckoo.bucket.empty.key";
const std::string CuckooTablePropertyNames::kNumHashTable =
const std::string CuckooTablePropertyNames::kNumHashFunc =
"rocksdb.cuckoo.hash.num";
const std::string CuckooTablePropertyNames::kMaxNumBuckets =
"rocksdb.cuckoo.bucket.maxnum";
const std::string CuckooTablePropertyNames::kHashTableSize =
"rocksdb.cuckoo.hash.size";
const std::string CuckooTablePropertyNames::kValueLength =
"rocksdb.cuckoo.value.length";
const std::string CuckooTablePropertyNames::kIsLastLevel =
"rocksdb.cuckoo.file.islastlevel";
const std::string CuckooTablePropertyNames::kCuckooBlockSize =
"rocksdb.cuckoo.hash.cuckooblocksize";

// Obtained by running echo rocksdb.table.cuckoo | sha1sum
extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull;

CuckooTableBuilder::CuckooTableBuilder(
WritableFile* file, double hash_table_ratio,
uint32_t max_num_hash_table, uint32_t max_search_depth,
const Comparator* user_comparator,
const Comparator* user_comparator, uint32_t cuckoo_block_size,
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t))
: num_hash_table_(2),
: num_hash_func_(2),
file_(file),
hash_table_ratio_(hash_table_ratio),
max_num_hash_table_(max_num_hash_table),
max_num_hash_func_(max_num_hash_table),
max_search_depth_(max_search_depth),
cuckoo_block_size_(std::max(1U, cuckoo_block_size)),
is_last_level_file_(false),
has_seen_first_key_(false),
ucomp_(user_comparator),
Expand Down Expand Up @@ -101,48 +104,58 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
}

Status CuckooTableBuilder::MakeHashTable(std::vector<CuckooBucket>* buckets) {
uint64_t num_buckets = kvs_.size() / hash_table_ratio_;
buckets->resize(num_buckets);
uint64_t hash_table_size = kvs_.size() / hash_table_ratio_;
buckets->resize(hash_table_size + cuckoo_block_size_ - 1);
uint64_t make_space_for_key_call_id = 0;
for (uint32_t vector_idx = 0; vector_idx < kvs_.size(); vector_idx++) {
uint64_t bucket_id;
bool bucket_found = false;
autovector<uint64_t> hash_vals;
Slice user_key = is_last_level_file_ ? kvs_[vector_idx].first :
ExtractUserKey(kvs_[vector_idx].first);
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
uint64_t hash_val = get_slice_hash_(user_key, hash_cnt, num_buckets);
if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) {
bucket_id = hash_val;
bucket_found = true;
break;
} else {
if (ucomp_->Compare(user_key, is_last_level_file_
? Slice(kvs_[(*buckets)[hash_val].vector_idx].first)
: ExtractUserKey(
kvs_[(*buckets)[hash_val].vector_idx].first)) == 0) {
return Status::NotSupported("Same key is being inserted again.");
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_ && !bucket_found;
++hash_cnt) {
uint64_t hash_val = get_slice_hash_(user_key, hash_cnt, hash_table_size);
// If there is a collision, check next cuckoo_block_size_ locations for
// empty locations. While checking, if we reach end of the hash table,
// stop searching and proceed for next hash function.
for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
++block_idx, ++hash_val) {
if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) {
bucket_id = hash_val;
bucket_found = true;
break;
} else {
if (ucomp_->Compare(user_key, is_last_level_file_
? Slice(kvs_[(*buckets)[hash_val].vector_idx].first)
: ExtractUserKey(
kvs_[(*buckets)[hash_val].vector_idx].first)) == 0) {
return Status::NotSupported("Same key is being inserted again.");
}
hash_vals.push_back(hash_val);
}
hash_vals.push_back(hash_val);
}
}
while (!bucket_found && !MakeSpaceForKey(hash_vals,
++make_space_for_key_call_id, buckets, &bucket_id)) {
hash_table_size, ++make_space_for_key_call_id, buckets, &bucket_id)) {
// Rehash by increashing number of hash tables.
if (num_hash_table_ >= max_num_hash_table_) {
return Status::NotSupported("Too many collissions. Unable to hash.");
if (num_hash_func_ >= max_num_hash_func_) {
return Status::NotSupported("Too many collisions. Unable to hash.");
}
// We don't really need to rehash the entire table because old hashes are
// still valid and we only increased the number of hash functions.
uint64_t hash_val = get_slice_hash_(user_key,
num_hash_table_, num_buckets);
++num_hash_table_;
if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) {
bucket_found = true;
bucket_id = hash_val;
break;
} else {
hash_vals.push_back(hash_val);
num_hash_func_, hash_table_size);
++num_hash_func_;
for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
++block_idx, ++hash_val) {
if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) {
bucket_found = true;
bucket_id = hash_val;
break;
} else {
hash_vals.push_back(hash_val);
}
}
}
(*buckets)[bucket_id].vector_idx = vector_idx;
Expand Down Expand Up @@ -226,16 +239,22 @@ Status CuckooTableBuilder::Finish() {
properties_.user_collected_properties[
CuckooTablePropertyNames::kEmptyKey] = unused_bucket;
properties_.user_collected_properties[
CuckooTablePropertyNames::kNumHashTable].assign(
reinterpret_cast<char*>(&num_hash_table_), sizeof(num_hash_table_));
uint64_t num_buckets = buckets.size();
CuckooTablePropertyNames::kNumHashFunc].assign(
reinterpret_cast<char*>(&num_hash_func_), sizeof(num_hash_func_));

uint64_t hash_table_size = buckets.size() - cuckoo_block_size_ + 1;
properties_.user_collected_properties[
CuckooTablePropertyNames::kMaxNumBuckets].assign(
reinterpret_cast<const char*>(&num_buckets), sizeof(num_buckets));
CuckooTablePropertyNames::kHashTableSize].assign(
reinterpret_cast<const char*>(&hash_table_size),
sizeof(hash_table_size));
properties_.user_collected_properties[
CuckooTablePropertyNames::kIsLastLevel].assign(
reinterpret_cast<const char*>(&is_last_level_file_),
sizeof(is_last_level_file_));
properties_.user_collected_properties[
CuckooTablePropertyNames::kCuckooBlockSize].assign(
reinterpret_cast<const char*>(&cuckoo_block_size_),
sizeof(cuckoo_block_size_));

// Write meta blocks.
MetaIndexBuilder meta_index_builder;
Expand Down Expand Up @@ -307,6 +326,7 @@ uint64_t CuckooTableBuilder::FileSize() const {
// If tree depth exceedes max depth, we return false indicating failure.
bool CuckooTableBuilder::MakeSpaceForKey(
const autovector<uint64_t>& hash_vals,
const uint64_t hash_table_size,
const uint64_t make_space_for_key_call_id,
std::vector<CuckooBucket>* buckets,
uint64_t* bucket_id) {
Expand All @@ -322,12 +342,13 @@ bool CuckooTableBuilder::MakeSpaceForKey(
std::vector<CuckooNode> tree;
// We want to identify already visited buckets in the current method call so
// that we don't add same buckets again for exploration in the tree.
// We do this by maintaining a count of current method call, which acts as a
// unique id for this invocation of the method. We store this number into
// the nodes that we explore in current method call.
// We do this by maintaining a count of current method call in
// make_space_for_key_call_id, which acts as a unique id for this invocation
// of the method. We store this number into the nodes that we explore in
// current method call.
// It is unlikely for the increment operation to overflow because the maximum
// no. of times this will be called is <= max_num_hash_table_ + kvs_.size().
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
// no. of times this will be called is <= max_num_hash_func_ + kvs_.size().
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_; ++hash_cnt) {
uint64_t bucket_id = hash_vals[hash_cnt];
(*buckets)[bucket_id].make_space_for_key_call_id =
make_space_for_key_call_id;
Expand All @@ -342,22 +363,26 @@ bool CuckooTableBuilder::MakeSpaceForKey(
break;
}
CuckooBucket& curr_bucket = (*buckets)[curr_node.bucket_id];
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
for (uint32_t hash_cnt = 0;
hash_cnt < num_hash_func_ && !null_found; ++hash_cnt) {
uint64_t child_bucket_id = get_slice_hash_(
is_last_level_file_ ? kvs_[curr_bucket.vector_idx].first
: ExtractUserKey(Slice(kvs_[curr_bucket.vector_idx].first)),
hash_cnt, buckets->size());
if ((*buckets)[child_bucket_id].make_space_for_key_call_id ==
make_space_for_key_call_id) {
continue;
}
(*buckets)[child_bucket_id].make_space_for_key_call_id =
make_space_for_key_call_id;
tree.push_back(CuckooNode(child_bucket_id, curr_depth + 1,
curr_pos));
if ((*buckets)[child_bucket_id].vector_idx == kMaxVectorIdx) {
null_found = true;
break;
hash_cnt, hash_table_size);
for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
++block_idx, ++child_bucket_id) {
if ((*buckets)[child_bucket_id].make_space_for_key_call_id ==
make_space_for_key_call_id) {
continue;
}
(*buckets)[child_bucket_id].make_space_for_key_call_id =
make_space_for_key_call_id;
tree.push_back(CuckooNode(child_bucket_id, curr_depth + 1,
curr_pos));
if ((*buckets)[child_bucket_id].vector_idx == kMaxVectorIdx) {
null_found = true;
break;
}
}
}
++curr_pos;
Expand All @@ -367,10 +392,10 @@ bool CuckooTableBuilder::MakeSpaceForKey(
// There is an empty node in tree.back(). Now, traverse the path from this
// empty node to top of the tree and at every node in the path, replace
// child with the parent. Stop when first level is reached in the tree
// (happens when 0 <= bucket_to_replace_pos < num_hash_table_) and return
// (happens when 0 <= bucket_to_replace_pos < num_hash_func_) and return
// this location in first level for target key to be inserted.
uint32_t bucket_to_replace_pos = tree.size()-1;
while (bucket_to_replace_pos >= num_hash_table_) {
while (bucket_to_replace_pos >= num_hash_func_) {
CuckooNode& curr_node = tree[bucket_to_replace_pos];
(*buckets)[curr_node.bucket_id] =
(*buckets)[tree[curr_node.parent_pos].bucket_id];
Expand Down
9 changes: 6 additions & 3 deletions table/cuckoo_table_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class CuckooTableBuilder: public TableBuilder {
CuckooTableBuilder(
WritableFile* file, double hash_table_ratio, uint32_t max_num_hash_table,
uint32_t max_search_depth, const Comparator* user_comparator,
uint32_t cuckoo_block_size,
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t));

// REQUIRES: Either Finish() or Abandon() has been called.
Expand Down Expand Up @@ -60,24 +61,26 @@ class CuckooTableBuilder: public TableBuilder {
CuckooBucket()
: vector_idx(kMaxVectorIdx), make_space_for_key_call_id(0) {}
uint32_t vector_idx;
// This number will not exceed kvs_.size() + max_num_hash_table_.
// This number will not exceed kvs_.size() + max_num_hash_func_.
// We assume number of items is <= 2^32.
uint32_t make_space_for_key_call_id;
};
static const uint32_t kMaxVectorIdx = std::numeric_limits<int32_t>::max();

bool MakeSpaceForKey(
const autovector<uint64_t>& hash_vals,
const uint64_t hash_table_size,
const uint64_t call_id,
std::vector<CuckooBucket>* buckets,
uint64_t* bucket_id);
Status MakeHashTable(std::vector<CuckooBucket>* buckets);

uint32_t num_hash_table_;
uint32_t num_hash_func_;
WritableFile* file_;
const double hash_table_ratio_;
const uint32_t max_num_hash_table_;
const uint32_t max_num_hash_func_;
const uint32_t max_search_depth_;
const uint32_t cuckoo_block_size_;
bool is_last_level_file_;
Status status_;
std::vector<std::pair<std::string, std::string>> kvs_;
Expand Down
Loading

0 comments on commit 7f71448

Please sign in to comment.