diff --git a/include/rocksdb/write_buffer_manager.h b/include/rocksdb/write_buffer_manager.h index 3c97638814ae..f2b93d8aa55e 100644 --- a/include/rocksdb/write_buffer_manager.h +++ b/include/rocksdb/write_buffer_manager.h @@ -56,17 +56,13 @@ class WriteBufferManager final { // when `flush_size` is triggered. By enabling this flag, the oldest mutable // memtable will be frozen instead. // - // - flush_deadline: Interval in seconds. Flush memtable immediately if its - // oldest key was written before `now - flush_deadline` and the total memory - // reaches `flush_size`. - // // - cache: if `cache` is provided, memtable memory will be charged as a // dummy entry This is useful to keep the memory sum of both memtable and // block cache under control. - explicit WriteBufferManager( - size_t flush_size, std::shared_ptr cache = {}, - float stall_ratio = 0.0, bool flush_oldest_first = false, - uint64_t flush_deadline = std::numeric_limits::max()); + explicit WriteBufferManager(size_t flush_size, + std::shared_ptr cache = {}, + float stall_ratio = 0.0, + bool flush_oldest_first = false); // No copying allowed WriteBufferManager(const WriteBufferManager&) = delete; WriteBufferManager& operator=(const WriteBufferManager&) = delete; @@ -105,10 +101,6 @@ class WriteBufferManager final { flush_oldest_first_.store(v, std::memory_order_relaxed); } - void SetDeadline(uint64_t deadline) { - flush_deadline_.store(deadline, std::memory_order_relaxed); - } - // Below functions should be called by RocksDB internally. // This handle is the same as the one created by `DB::Open` or @@ -204,7 +196,6 @@ class WriteBufferManager final { // Only used when flush_size is non-zero. std::atomic memory_active_; std::atomic flush_oldest_first_; - std::atomic flush_deadline_; const bool allow_stall_; const float stall_ratio_; diff --git a/memtable/write_buffer_manager.cc b/memtable/write_buffer_manager.cc index dbf447e79a4f..0f6cad7f9927 100644 --- a/memtable/write_buffer_manager.cc +++ b/memtable/write_buffer_manager.cc @@ -26,7 +26,6 @@ WriteBufferManager::WriteBufferManager(size_t _flush_size, flush_size_(_flush_size), memory_active_(0), flush_oldest_first_(flush_oldest_first), - flush_deadline_(flush_deadline), allow_stall_(stall_ratio >= 1.0), stall_ratio_(stall_ratio), stall_active_(false), @@ -187,33 +186,29 @@ void WriteBufferManager::MaybeFlushLocked(DB* this_db) { uint64_t max_score = 0; uint64_t current_score = 0; - uint64_t deadline_interval = flush_deadline_.load(std::memory_order_relaxed); - uint64_t deadline_time = 0; - if (deadline_interval != std::numeric_limits::max()) { - int64_t current; - auto s = SystemClock::Default()->GetCurrentTime(¤t); - if (s.ok()) { - assert(current > 0); - if (static_cast(current) > deadline_interval) { - deadline_time = static_cast(current) - deadline_interval; - } - } - } for (auto& s : sentinels_) { + // TODO: move this calculation to a callback. uint64_t current_memory_bytes = std::numeric_limits::max(); uint64_t oldest_time = std::numeric_limits::max(); s->db->GetApproximateActiveMemTableStats(s->cf, ¤t_memory_bytes, &oldest_time); - if (oldest_time < deadline_time) { - candidate = s.get(); - candidate_size = current_memory_bytes; - break; - } else if (flush_oldest_first_.load(std::memory_order_relaxed)) { + if (flush_oldest_first_.load(std::memory_order_relaxed)) { // Convert oldest to highest score. current_score = std::numeric_limits::max() - oldest_time; } else { current_score = current_memory_bytes; } + // A very mild penalty for too many L0 files. + uint64_t level0; + if (s->db->GetProperty(kNumFilesAtLevelPrefix + "0", &level0).ok() && + level0 >= 4) { + // 4->2, 5->4, 6->8, 7->12, 8->18 + uint64_t factor = (level0 - 2) * (level0 - 2) / 2; + if (factor > 100) { + factor = 100; + } + current_score = current_score * (100 - factor) / factor; + } if (current_score > max_score) { candidate = s.get(); max_score = current_score;