Skip to content

Commit

Permalink
[Benchmark] Support new eviction strategy.
Browse files Browse the repository at this point in the history
  • Loading branch information
rhdong committed Jul 25, 2023
1 parent 0e69b7e commit 7719b3a
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 23 deletions.
13 changes: 7 additions & 6 deletions benchmark/benchmark_util.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,10 @@ inline uint64_t getTimestamp() {

template <class K, class S>
void create_continuous_keys(K* h_keys, S* h_scores, const int key_num_per_op,
const K start = 0) {
const K start = 0, int freq_range = 1000) {
for (K i = 0; i < key_num_per_op; i++) {
h_keys[i] = start + static_cast<K>(i);
if (h_scores != nullptr) h_scores[i] = getTimestamp();
if (h_scores != nullptr) h_scores[i] = h_keys[i] % freq_range;
}
}

Expand All @@ -135,7 +135,8 @@ template <typename K, typename S>
void create_keys_for_hitrate(K* h_keys, S* h_scores, const int key_num_per_op,
const float hitrate = 0.6f,
const Hit_Mode hit_mode = Hit_Mode::last_insert,
const K end = 0, const bool reset = false) {
const K end = 0, const bool reset = false,
int freq_range = 1000) {
int divide = static_cast<int>(key_num_per_op * hitrate);
if (Hit_Mode::random == hit_mode) {
std::random_device rd;
Expand All @@ -154,13 +155,13 @@ void create_keys_for_hitrate(K* h_keys, S* h_scores, const int key_num_per_op,
int i = 0;
for (auto existed_value : numbers) {
h_keys[i] = existed_value;
if (h_scores != nullptr) h_scores[i] = getTimestamp();
if (h_scores != nullptr) h_scores[i] = h_keys[i] % freq_range;
i++;
}
} else {
// else keep its original value, but update scores
for (int i = 0; i < divide; i++) {
if (h_scores != nullptr) h_scores[i] = getTimestamp();
if (h_scores != nullptr) h_scores[i] = getTimestamp() % freq_range;
}
}

Expand All @@ -170,7 +171,7 @@ void create_keys_for_hitrate(K* h_keys, S* h_scores, const int key_num_per_op,
}
for (int i = divide; i < key_num_per_op; i++) {
h_keys[i] = new_value--;
if (h_scores != nullptr) h_scores[i] = getTimestamp();
if (h_scores != nullptr) h_scores[i] = getTimestamp() % freq_range;
}
}

Expand Down
54 changes: 37 additions & 17 deletions benchmark/merlin_hashtable_benchmark.cc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,13 @@ float test_one_api(std::shared_ptr<Table>& table, const API_Select api,

CUDA_CHECK(cudaMemset(h_vectors, 0, key_num_per_op * sizeof(V) * dim));

bool need_scores = (Table::evict_strategy == EvictStrategy::kLfu ||
Table::evict_strategy == EvictStrategy::kEpochLfu ||
Table::evict_strategy == EvictStrategy::kCustomized);

K* d_keys;
S* d_scores = nullptr;
S* d_scores_real;
S* d_scores;
V* d_vectors;
V* d_def_val;
V** d_vectors_ptr;
Expand All @@ -85,6 +90,7 @@ float test_one_api(std::shared_ptr<Table>& table, const API_Select api,
S* d_evict_scores;

CUDA_CHECK(cudaMalloc(&d_keys, key_num_per_op * sizeof(K)));
CUDA_CHECK(cudaMalloc(&d_scores_real, key_num_per_op * sizeof(S)));
CUDA_CHECK(cudaMalloc(&d_vectors, key_num_per_op * sizeof(V) * dim));
CUDA_CHECK(cudaMalloc(&d_def_val, key_num_per_op * sizeof(V) * dim));
CUDA_CHECK(cudaMalloc(&d_vectors_ptr, key_num_per_op * sizeof(V*)));
Expand All @@ -99,6 +105,8 @@ float test_one_api(std::shared_ptr<Table>& table, const API_Select api,
CUDA_CHECK(cudaMemset(d_vectors_ptr, 0, key_num_per_op * sizeof(V*)));
CUDA_CHECK(cudaMemset(d_found, 0, key_num_per_op * sizeof(bool)));

d_scores = need_scores ? d_scores_real : nullptr;

cudaStream_t stream;
CUDA_CHECK(cudaStreamCreate(&stream));

Expand All @@ -114,12 +122,16 @@ float test_one_api(std::shared_ptr<Table>& table, const API_Select api,
K start = 0UL;

S threshold = benchmark::host_nano<S>();
for (int i = 0; i < loop_num_init; i++) {
int global_epoch = 0;
for (; global_epoch < loop_num_init; global_epoch++) {
uint64_t key_num_cur_insert =
i == loop_num_init - 1 ? key_num_remain : key_num_per_op;
global_epoch == loop_num_init - 1 ? key_num_remain : key_num_per_op;
create_continuous_keys<K, S>(h_keys, h_scores, key_num_cur_insert, start);
CUDA_CHECK(cudaMemcpy(d_keys, h_keys, key_num_cur_insert * sizeof(K),
cudaMemcpyHostToDevice));
CUDA_CHECK(cudaMemcpy(d_scores_real, h_scores,
key_num_cur_insert * sizeof(S),
cudaMemcpyHostToDevice));
table->find_or_insert(key_num_cur_insert, d_keys, d_vectors_ptr, d_found,
d_scores, stream);
CUDA_CHECK(cudaStreamSynchronize(stream));
Expand All @@ -139,8 +151,10 @@ float test_one_api(std::shared_ptr<Table>& table, const API_Select api,
create_continuous_keys<K, S>(h_keys, h_scores, key_num_append, start);
CUDA_CHECK(cudaMemcpy(d_keys, h_keys, key_num_append * sizeof(K),
cudaMemcpyHostToDevice));
table->insert_or_assign(key_num_append, d_keys, d_vectors, d_scores,
stream);
CUDA_CHECK(cudaMemcpy(d_scores_real, h_scores, key_num_append * sizeof(S),
cudaMemcpyHostToDevice));
table->insert_or_assign(key_num_append, d_keys, d_vectors, d_scores, stream,
global_epoch);
CUDA_CHECK(cudaStreamSynchronize(stream));
start += key_num_append;
real_load_factor = table->load_factor(stream);
Expand All @@ -149,7 +163,7 @@ float test_one_api(std::shared_ptr<Table>& table, const API_Select api,

// For trigger the kernel selection in advance.
int key_num_per_op_warmup = 1;
for (int i = 0; i < 9; i++) {
for (int i = 0; i < 9; i++, global_epoch++) {
switch (api) {
case API_Select::find: {
table->find(key_num_per_op_warmup, d_keys, d_vectors, d_found, d_scores,
Expand All @@ -159,26 +173,26 @@ float test_one_api(std::shared_ptr<Table>& table, const API_Select api,
}
case API_Select::insert_or_assign: {
table->insert_or_assign(key_num_per_op_warmup, d_keys, d_vectors,
d_scores, stream);
d_scores, stream, global_epoch);
CUDA_CHECK(cudaStreamSynchronize(stream));
break;
}
case API_Select::find_or_insert: {
table->find_or_insert(key_num_per_op_warmup, d_keys, d_vectors,
d_scores, stream);
d_scores, stream, global_epoch);
CUDA_CHECK(cudaStreamSynchronize(stream));
break;
}
case API_Select::assign: {
table->assign(key_num_per_op_warmup, d_keys, d_def_val, d_scores,
stream);
stream, global_epoch);
CUDA_CHECK(cudaStreamSynchronize(stream));
break;
}
case API_Select::insert_and_evict: {
table->insert_and_evict(key_num_per_op_warmup, d_keys, d_vectors,
d_scores, d_evict_keys, d_def_val,
d_evict_scores, stream);
d_evict_scores, stream, global_epoch);
CUDA_CHECK(cudaStreamSynchronize(stream));
break;
}
Expand Down Expand Up @@ -208,7 +222,7 @@ float test_one_api(std::shared_ptr<Table>& table, const API_Select api,
key_num_per_op_warmup, stream);
CUDA_CHECK(cudaStreamSynchronize(stream));
table->find_or_insert(key_num_per_op_warmup, d_keys, d_vectors_ptr,
d_found, d_scores, stream);
d_found, d_scores, stream, global_epoch);
CUDA_CHECK(cudaStreamSynchronize(stream));
CUDA_CHECK(cudaFree(d_vectors_ptr));
CUDA_CHECK(cudaFree(d_found));
Expand Down Expand Up @@ -246,7 +260,10 @@ float test_one_api(std::shared_ptr<Table>& table, const API_Select api,
Hit_Mode::last_insert, start, true /*reset*/);
CUDA_CHECK(cudaMemcpy(d_keys, h_keys, key_num_per_op * sizeof(K),
cudaMemcpyHostToDevice));
CUDA_CHECK(cudaMemcpy(d_scores_real, h_scores, key_num_per_op * sizeof(K),
cudaMemcpyHostToDevice));
auto timer = benchmark::Timer<double>();
global_epoch++;
switch (api) {
case API_Select::find: {
timer.start();
Expand All @@ -258,30 +275,32 @@ float test_one_api(std::shared_ptr<Table>& table, const API_Select api,
case API_Select::insert_or_assign: {
timer.start();
table->insert_or_assign(key_num_per_op, d_keys, d_vectors, d_scores,
stream);
stream, global_epoch);
CUDA_CHECK(cudaStreamSynchronize(stream));
timer.end();
break;
}
case API_Select::find_or_insert: {
timer.start();
table->find_or_insert(key_num_per_op, d_keys, d_vectors, d_scores,
stream);
table->find_or_insert(key_num_per_op, d_keys, d_vectors, d_scores, stream,
global_epoch);
CUDA_CHECK(cudaStreamSynchronize(stream));
timer.end();
break;
}
case API_Select::assign: {
timer.start();
table->assign(key_num_per_op, d_keys, d_def_val, d_scores, stream);
table->assign(key_num_per_op, d_keys, d_def_val, d_scores, stream,
global_epoch);
CUDA_CHECK(cudaStreamSynchronize(stream));
timer.end();
break;
}
case API_Select::insert_and_evict: {
timer.start();
table->insert_and_evict(key_num_per_op, d_keys, d_vectors, d_scores,
d_evict_keys, d_def_val, d_evict_scores, stream);
d_evict_keys, d_def_val, d_evict_scores, stream,
global_epoch);
CUDA_CHECK(cudaStreamSynchronize(stream));
timer.end();
break;
Expand Down Expand Up @@ -314,7 +333,7 @@ float test_one_api(std::shared_ptr<Table>& table, const API_Select api,
CUDA_CHECK(cudaStreamSynchronize(stream));
timer.start();
table->find_or_insert(key_num_per_op, d_keys, d_vectors_ptr, d_found,
d_scores, stream);
d_scores, stream, global_epoch);
CUDA_CHECK(cudaStreamSynchronize(stream));
timer.end();
CUDA_CHECK(cudaFree(d_vectors_ptr));
Expand Down Expand Up @@ -369,6 +388,7 @@ float test_one_api(std::shared_ptr<Table>& table, const API_Select api,
CUDA_CHECK(cudaFreeHost(h_found));

CUDA_CHECK(cudaFree(d_keys));
CUDA_CHECK(cudaFree(d_scores_real));
CUDA_CHECK(cudaFree(d_vectors));
CUDA_CHECK(cudaFree(d_def_val));
CUDA_CHECK(cudaFree(d_vectors_ptr));
Expand Down

0 comments on commit 7719b3a

Please sign in to comment.