Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: change how we track memory_budget during evictions #3457

Merged
merged 2 commits into from
Aug 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ jobs:
df -h
echo "-----------------------------"
ninja src/all

- name: PostFail
if: failure()
run: |
Expand Down Expand Up @@ -155,7 +156,7 @@ jobs:
timeout 5m ./multi_test --multi_exec_mode=1
timeout 5m ./multi_test --multi_exec_mode=3
timeout 5m ./json_family_test --jsonpathv2=false

timeout 5m ./tiered_storage_test --vmodule=db_slice=2 --logtostderr
- name: Upload unit logs on failure
if: failure()
uses: actions/upload-artifact@v4
Expand Down
85 changes: 47 additions & 38 deletions src/server/db_slice.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,12 @@ class PrimeEvictionPolicy {
static constexpr bool can_evict = true; // we implement eviction functionality.
static constexpr bool can_gc = true;

PrimeEvictionPolicy(const DbContext& cntx, bool can_evict, ssize_t mem_budget, ssize_t soft_limit,
// mem_offset - memory_offset that we should account for in addition to DbSlice::memory_budget.
// May be negative.
PrimeEvictionPolicy(const DbContext& cntx, bool can_evict, ssize_t mem_offset, ssize_t soft_limit,
DbSlice* db_slice, bool apply_memory_limit)
: db_slice_(db_slice),
mem_budget_(mem_budget),
mem_offset_(mem_offset),
soft_limit_(soft_limit),
cntx_(cntx),
can_evict_(can_evict),
Expand All @@ -85,7 +87,6 @@ class PrimeEvictionPolicy {

// A hook function that is called every time a segment is full and requires splitting.
void RecordSplit(PrimeTable::Segment_t* segment) {
mem_budget_ -= PrimeTable::kSegBytes;
DVLOG(2) << "split: " << segment->SlowSize() << "/" << segment->capacity();
}

Expand All @@ -94,10 +95,6 @@ class PrimeEvictionPolicy {
unsigned GarbageCollect(const PrimeTable::HotspotBuckets& eb, PrimeTable* me);
unsigned Evict(const PrimeTable::HotspotBuckets& eb, PrimeTable* me);

ssize_t mem_budget() const {
return mem_budget_;
}

unsigned evicted() const {
return evicted_;
}
Expand All @@ -108,7 +105,7 @@ class PrimeEvictionPolicy {

private:
DbSlice* db_slice_;
ssize_t mem_budget_;
ssize_t mem_offset_;
ssize_t soft_limit_ = 0;
const DbContext cntx_;

Expand Down Expand Up @@ -136,7 +133,8 @@ class PrimeBumpPolicy {
};

bool PrimeEvictionPolicy::CanGrow(const PrimeTable& tbl) const {
if (!apply_memory_limit_ || mem_budget_ > soft_limit_)
ssize_t mem_available = db_slice_->memory_budget() + mem_offset_;
if (!apply_memory_limit_ || mem_available > soft_limit_)
return true;

DCHECK_LE(tbl.size(), tbl.capacity());
Expand All @@ -145,11 +143,11 @@ bool PrimeEvictionPolicy::CanGrow(const PrimeTable& tbl) const {
// we estimate how much memory we will take with the current capacity
// even though we may currently use less memory.
// see https://github.com/dragonflydb/dragonfly/issues/256#issuecomment-1227095503
size_t new_available = (tbl.capacity() - tbl.size()) + PrimeTable::kSegCapacity;
bool res =
mem_budget_ > int64_t(PrimeTable::kSegBytes + db_slice_->bytes_per_object() * new_available *
GetFlag(FLAGS_table_growth_margin));
VLOG(2) << "available: " << new_available << ", res: " << res;
size_t table_free_items = (tbl.capacity() - tbl.size()) + PrimeTable::kSegCapacity;
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's just refactoring of the expression, by separating it into variables

size_t obj_bytes_estimation =
db_slice_->bytes_per_object() * table_free_items * GetFlag(FLAGS_table_growth_margin);
bool res = mem_available > int64_t(PrimeTable::kSegBytes + obj_bytes_estimation);
VLOG(2) << "available: " << table_free_items << ", res: " << res;

return res;
}
Expand Down Expand Up @@ -575,12 +573,21 @@ OpResult<DbSlice::AddOrFindResult> DbSlice::AddOrFindInternal(const Context& cnt
// It's a new entry.
CallChangeCallbacks(cntx.db_index, key, {key});

ssize_t memory_offset = -key.size();

// If we are low on memory due to cold storage, free some memory.
if (memory_budget_ < ssize_t(key.size()) && owner_->tiered_storage()) {
if (owner_->tiered_storage()) {
// At least 40KB bytes to cover potential segment split.
size_t goal = std::max<size_t>(key.size() * 2 - memory_budget_, 40_KB);
size_t reclaimed = owner_->tiered_storage()->ReclaimMemory(goal);
memory_budget_ += reclaimed;
ssize_t red_line = std::max<size_t>(key.size() * 2, 40_KB);
if (memory_budget_ < red_line) {
size_t goal = red_line - memory_budget_;
size_t reclaimed = owner_->tiered_storage()->ReclaimMemory(goal);
memory_budget_ += reclaimed;
}

// CoolMemoryUsage is the memory that we can always reclaim, like in the block above,
// therefore we include it for PrimeEvictionPolicy considerations.
memory_offset += owner_->tiered_storage()->CoolMemoryUsage();
}

// In case we are loading from rdb file or replicating we want to disable conservative memory
Expand All @@ -594,37 +601,36 @@ OpResult<DbSlice::AddOrFindResult> DbSlice::AddOrFindInternal(const Context& cnt
bool apply_memory_limit =
!owner_->IsReplica() && !(ServerState::tlocal()->gstate() == GlobalState::LOADING);

PrimeEvictionPolicy evp{cntx,
(bool(caching_mode_) && !owner_->IsReplica()),
int64_t(memory_budget_ - key.size()),
ssize_t(soft_budget_limit_),
this,
apply_memory_limit};

// If we are over limit in non-cache scenario, just be conservative and throw.
if (apply_memory_limit && !caching_mode_ && evp.mem_budget() < 0) {
VLOG(2) << "AddOrFind: over limit, budget: " << evp.mem_budget();
if (apply_memory_limit && !caching_mode_ && memory_budget_ + memory_offset < 0) {
VLOG(2) << "AddOrFind: over limit, budget: " << memory_budget_;
events_.insertion_rejections++;
return OpStatus::OUT_OF_MEMORY;
}

PrimeEvictionPolicy evp{cntx, (bool(caching_mode_) && !owner_->IsReplica()),
memory_offset, ssize_t(soft_budget_limit_),
this, apply_memory_limit};

// Fast-path if change_cb_ is empty so we Find or Add using
// the insert operation: twice more efficient.
CompactObj co_key{key};
PrimeIterator it;

size_t table_before = db.table_memory();
ssize_t table_before = db.prime.mem_usage();

try {
it = db.prime.InsertNew(std::move(co_key), PrimeValue{}, evp);
} catch (bad_alloc& e) {
VLOG(2) << "AddOrFind2: bad alloc exception, budget: " << evp.mem_budget();
VLOG(2) << "AddOrFind: bad alloc exception, budget: " << memory_budget_ + memory_offset;
events_.insertion_rejections++;
return OpStatus::OUT_OF_MEMORY;
}

size_t evicted_obj_bytes = 0;
if (evp.mem_budget() < 0 && apply_memory_limit) {
ssize_t table_increase = db.prime.mem_usage() - table_before;
memory_budget_ -= table_increase;

if (memory_budget_ < 0 && apply_memory_limit) {
// We may reach the state when our memory usage is below the limit even if we
// do not add new segments. For example, we have half full segments
// and we add new objects or update the existing ones and our memory usage grows.
Expand All @@ -636,12 +642,11 @@ OpResult<DbSlice::AddOrFindResult> DbSlice::AddOrFindInternal(const Context& cnt
#if 0
size_t evict_goal = std::max<size_t>(512, (-evp.mem_budget()) / 32);
auto [items, bytes] = FreeMemWithEvictionStep(cntx.db_index, it.segment_id(), evict_goal);
evicted_obj_bytes = bytes;
events_.hard_evictions += items;
#endif
}

table_memory_ += (db.table_memory() - table_before);
table_memory_ += table_increase;
entries_count_++;

db.stats.inline_keys += it->first.IsInline();
Expand All @@ -654,8 +659,6 @@ OpResult<DbSlice::AddOrFindResult> DbSlice::AddOrFindInternal(const Context& cnt
events_.stash_unloaded = db.prime.stash_unloaded();
events_.evicted_keys += evp.evicted();
events_.garbage_checked += evp.checked();

memory_budget_ = evp.mem_budget() + evicted_obj_bytes;
if (cluster::IsClusterEnabled()) {
cluster::SlotId sid = cluster::KeySlot(key);
db.slots_stats[sid].key_count += 1;
Expand Down Expand Up @@ -1470,9 +1473,9 @@ void DbSlice::PerformDeletion(Iterator del_it, ExpIterator exp_it, DbTable* tabl
shard_owner()->tiered_storage()->Delete(table->index, &del_it->second);
}

size_t value_heap_size = pv.MallocUsed();
ssize_t value_heap_size = pv.MallocUsed(), key_size_used = del_it->first.MallocUsed();
stats.inline_keys -= del_it->first.IsInline();
AccountObjectMemory(del_it.key(), del_it->first.ObjType(), -del_it->first.MallocUsed(),
AccountObjectMemory(del_it.key(), del_it->first.ObjType(), -key_size_used,
table); // Key
AccountObjectMemory(del_it.key(), pv.ObjType(), -value_heap_size, table); // Value
if (pv.ObjType() == OBJ_HASH && pv.Encoding() == kEncodingListPack) {
Expand All @@ -1487,8 +1490,14 @@ void DbSlice::PerformDeletion(Iterator del_it, ExpIterator exp_it, DbTable* tabl
}

table->prime.Erase(del_it.GetInnerIt());
table_memory_ += (table->table_memory() - table_before);

// Note, currently we do not shrink our tables upon deletion.
// This DCHECK ensures that if we decide to do so, we will have to update table_memory_
// accordingly.
DCHECK_EQ(table->table_memory(), table_before);

--entries_count_;
memory_budget_ += (value_heap_size + key_size_used);

SendInvalidationTrackingMessage(del_it.key());
}
Expand Down
2 changes: 1 addition & 1 deletion src/server/db_slice.h
Original file line number Diff line number Diff line change
Expand Up @@ -575,7 +575,7 @@ class DbSlice {
bool expire_allowed_ = true;

uint64_t version_ = 1; // Used to version entries in the PrimeTable.
ssize_t memory_budget_ = SSIZE_MAX;
ssize_t memory_budget_ = SSIZE_MAX / 2;
size_t bytes_per_object_ = 0;
size_t soft_budget_limit_ = 0;
size_t table_memory_ = 0;
Expand Down
3 changes: 1 addition & 2 deletions src/server/tiered_storage_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -310,8 +310,7 @@ TEST_F(TieredStorageTest, MemoryPressure) {
resp = Run({"INFO", "ALL"});
ASSERT_FALSE(true) << i << "\nInfo ALL:\n" << resp.GetString();
}
// TODO: to remove it once used_mem_current is updated frequently.
ThisFiber::SleepFor(300us);
ThisFiber::SleepFor(100us);
}

EXPECT_LT(used_mem_peak.load(), 20_MB);
Expand Down
Loading