Skip to content

Commit

Permalink
Flushes should always go to HIGH priority thread pool
Browse files Browse the repository at this point in the history
Summary:
This is not column-family related diff. It is in columnfamily branch because the change is significant and we want to push it with next major release (3.0).

It removes the leveldb notion of one thread pool and expands it to two thread pools by default (HIGH and LOW). Flush process is removed from compaction process and all flush threads are executed on HIGH thread pool, since we don't want long-running compactions to influence flush latency.

Test Plan: make check

Reviewers: dhruba, haobo, kailiu, sdong

CC: leveldb

Differential Revision: https://reviews.facebook.net/D15987
  • Loading branch information
igorcanadi committed Feb 6, 2014
1 parent f8d5443 commit 0b4ccf7
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 57 deletions.
7 changes: 6 additions & 1 deletion HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
# Rocksdb Change Log

## Unreleased
## Unreleased (will be released in 3.0)
* By default, max_background_flushes is 1 and flush process is
removed from background compaction process. Flush process is now always
executed in high priority thread pool.

## Unreleased (will be relased in 2.8)
* By default, checksums are verified on every read from database


Expand Down
68 changes: 16 additions & 52 deletions db/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,9 @@ Options SanitizeOptions(const std::string& dbname,
DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src) {
DBOptions result = src;
ClipToRange(&result.max_open_files, 20, 1000000);
if (result.max_background_flushes == 0) {
result.max_background_flushes = 1;
}

if (result.info_log == nullptr) {
Status s = CreateLoggerFromOptions(dbname, result.db_log_dir, src.env,
Expand Down Expand Up @@ -1704,11 +1707,15 @@ void DBImpl::MaybeScheduleFlushOrCompaction() {
is_flush_pending = true;
}
}
if (is_flush_pending &&
(bg_flush_scheduled_ < options_.max_background_flushes)) {
if (is_flush_pending) {
// memtable flush needed
bg_flush_scheduled_++;
env_->Schedule(&DBImpl::BGWorkFlush, this, Env::Priority::HIGH);
// max_background_compactions should not be 0, because that means
// flush will never get executed
assert(options_.max_background_flushes != 0);
if (bg_flush_scheduled_ < options_.max_background_flushes) {
bg_flush_scheduled_++;
env_->Schedule(&DBImpl::BGWorkFlush, this, Env::Priority::HIGH);
}
}
bool is_compaction_needed = false;
for (auto cfd : *versions_->GetColumnFamilySet()) {
Expand All @@ -1718,12 +1725,10 @@ void DBImpl::MaybeScheduleFlushOrCompaction() {
}
}

// Schedule BGWorkCompaction if there's a compaction pending (or a memtable
// flush, but the HIGH pool is not enabled). Do it only if
// max_background_compactions hasn't been reached and, in case
// Schedule BGWorkCompaction if there's a compaction pending
// Do it only if max_background_compactions hasn't been reached and, in case
// bg_manual_only_ > 0, if it's a manual compaction.
if ((manual_compaction_ || is_compaction_needed ||
(is_flush_pending && (options_.max_background_flushes <= 0))) &&
if ((manual_compaction_ || is_compaction_needed) &&
bg_compaction_scheduled_ < options_.max_background_compactions &&
(!bg_manual_only_ || manual_compaction_)) {

Expand Down Expand Up @@ -1868,41 +1873,14 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress,
*madeProgress = false;
mutex_.AssertHeld();

unique_ptr<Compaction> c;
bool is_manual = (manual_compaction_ != nullptr) &&
(manual_compaction_->in_progress == false);
if (is_manual) {
// another thread cannot pick up the same work
manual_compaction_->in_progress = true;
}

// TODO: remove memtable flush from formal compaction
for (auto cfd : *versions_->GetColumnFamilySet()) {
while (cfd->imm()->IsFlushPending()) {
Log(options_.info_log,
"BackgroundCompaction doing FlushMemTableToOutputFile with column "
"family %d, compaction slots available %d",
cfd->GetID(),
options_.max_background_compactions - bg_compaction_scheduled_);
Status stat =
FlushMemTableToOutputFile(cfd, madeProgress, deletion_state);
if (!stat.ok()) {
if (is_manual) {
manual_compaction_->status = stat;
manual_compaction_->done = true;
manual_compaction_->in_progress = false;
manual_compaction_ = nullptr;
}
return stat;
}
}
}

unique_ptr<Compaction> c;
InternalKey manual_end_storage;
InternalKey* manual_end = &manual_end_storage;
if (is_manual) {
ManualCompaction* m = manual_compaction_;
assert(m->in_progress);
m->in_progress = true;
c.reset(m->cfd->CompactRange(m->input_level, m->output_level, m->begin,
m->end, &manual_end));
if (!c) {
Expand Down Expand Up @@ -2299,20 +2277,6 @@ Status DBImpl::DoCompactionWork(CompactionState* compact,
}

for (; input->Valid() && !shutting_down_.Acquire_Load(); ) {
// Prioritize immutable compaction work
// TODO: remove memtable flush from normal compaction work
if (cfd->imm()->imm_flush_needed.NoBarrier_Load() != nullptr) {
const uint64_t imm_start = env_->NowMicros();
LogFlush(options_.info_log);
mutex_.Lock();
if (cfd->imm()->IsFlushPending()) {
FlushMemTableToOutputFile(cfd, nullptr, deletion_state);
bg_cv_.SignalAll(); // Wakeup MakeRoomForWrite() if necessary
}
mutex_.Unlock();
imm_micros += (env_->NowMicros() - imm_start);
}

Slice key = input->key();
Slice value = input->value();

Expand Down
14 changes: 11 additions & 3 deletions include/rocksdb/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -526,21 +526,29 @@ struct DBOptions {
// regardless of this setting
uint64_t delete_obsolete_files_period_micros;

// Maximum number of concurrent background jobs, submitted to
// the default LOW priority thread pool
// Maximum number of concurrent background compaction jobs, submitted to
// the default LOW priority thread pool.
// If you're increasing this, also consider increasing number of threads in
// LOW priority thread pool. For more information, see
// Env::SetBackgroundThreads
// Default: 1
int max_background_compactions;

// Maximum number of concurrent background memtable flush jobs, submitted to
// the HIGH priority thread pool.
//
// By default, all background jobs (major compaction and memtable flush) go
// to the LOW priority pool. If this option is set to a positive number,
// memtable flush jobs will be submitted to the HIGH priority pool.
// It is important when the same Env is shared by multiple db instances.
// Without a separate pool, long running major compaction jobs could
// potentially block memtable flush jobs of other db instances, leading to
// unnecessary Put stalls.
// Default: 0
//
// If you're increasing this, also consider increasing number of threads in
// HIGH priority thread pool. For more information, see
// Env::SetBackgroundThreads
// Default: 1
int max_background_flushes;

// Specify the maximal size of the info log file. If the log file
Expand Down
2 changes: 1 addition & 1 deletion util/options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ DBOptions::DBOptions()
wal_dir(""),
delete_obsolete_files_period_micros(6 * 60 * 60 * 1000000UL),
max_background_compactions(1),
max_background_flushes(0),
max_background_flushes(1),
max_log_file_size(0),
log_file_time_to_roll(0),
keep_log_file_num(1000),
Expand Down

0 comments on commit 0b4ccf7

Please sign in to comment.