From 13ef21c22e7576a6538cf6c37ab88eece2c5d016 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Wed, 28 Feb 2024 14:36:13 -0800 Subject: [PATCH] default_write_temperature option (#12388) Summary: Currently SST files that aren't applicable to last_level_temperature nor file_temperature_age_thresholds are written with temperature kUnknown, which is a little weird and doesn't support CF-based tiering. The default_temperature option only affects how kUnknown is interpreted for stats. This change adds a new per-CF option default_write_temperature that determines the temperature of new SST files when those other options do not apply. Also made a change to ignore last_level_temperature with FIFO compaction, because I found that could lead to an infinite loop in compaction. Needed follow-up: Fix temperature handling with external file ingestion Pull Request resolved: https://github.com/facebook/rocksdb/pull/12388 Test Plan: unit tests extended appropriately. (Ignore whitespace changes when reviewing.) Reviewed By: jowlyzhang Differential Revision: D54266574 Pulled By: pdillinger fbshipit-source-id: c9ec9a74dbf22be6e986f77f9689d05fea8ef0bb --- db/column_family.cc | 7 + db/compaction/compaction_job.cc | 11 +- db/compaction/compaction_picker.cc | 11 +- db/compaction/compaction_picker_fifo.cc | 10 +- db/compaction/compaction_picker_level.cc | 2 +- db/compaction/compaction_picker_universal.cc | 8 +- db/db_compaction_test.cc | 140 ++++++++----- db/db_impl/db_impl_compaction_flush.cc | 3 +- db/db_test2.cc | 197 +++++++++++-------- db/external_sst_file_ingestion_job.cc | 5 +- db/flush_job.cc | 2 + db/flush_job.h | 2 +- file/random_access_file_reader.cc | 1 - include/rocksdb/advanced_options.h | 12 +- options/cf_options.cc | 4 + options/cf_options.h | 3 + options/options.cc | 2 + options/options_helper.cc | 1 + options/options_settable_test.cc | 1 + options/options_test.cc | 4 + 20 files changed, 273 insertions(+), 153 deletions(-) diff --git a/db/column_family.cc b/db/column_family.cc index 94830ce7d97..0923ff44842 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -411,6 +411,13 @@ ColumnFamilyOptions SanitizeOptions(const ImmutableDBOptions& db_options, "periodic_compaction_seconds does not support FIFO compaction. You" "may want to set option TTL instead."); } + if (result.last_level_temperature != Temperature::kUnknown) { + ROCKS_LOG_WARN( + db_options.info_log.get(), + "last_level_temperature is ignored with FIFO compaction. Consider " + "CompactionOptionsFIFO::file_temperature_age_thresholds."); + result.last_level_temperature = Temperature::kUnknown; + } } // For universal compaction, `ttl` and `periodic_compaction_seconds` mean the diff --git a/db/compaction/compaction_job.cc b/db/compaction/compaction_job.cc index 931716ff502..ff2cc9f957e 100644 --- a/db/compaction/compaction_job.cc +++ b/db/compaction/compaction_job.cc @@ -1850,13 +1850,14 @@ Status CompactionJob::OpenCompactionOutputFile(SubcompactionState* sub_compact, // Pass temperature of the last level files to FileSystem. FileOptions fo_copy = file_options_; Temperature temperature = sub_compact->compaction->output_temperature(); - // only set for the last level compaction and also it's not output to - // penultimate level (when preclude_last_level feature is enabled) - if (temperature == Temperature::kUnknown && + Temperature last_level_temp = + sub_compact->compaction->mutable_cf_options()->last_level_temperature; + // Here last_level_temperature supersedes default_write_temperature, when + // enabled and applicable + if (last_level_temp != Temperature::kUnknown && sub_compact->compaction->is_last_level() && !sub_compact->IsCurrentPenultimateLevel()) { - temperature = - sub_compact->compaction->mutable_cf_options()->last_level_temperature; + temperature = last_level_temp; } fo_copy.temperature = temperature; diff --git a/db/compaction/compaction_picker.cc b/db/compaction/compaction_picker.cc index 67e8ac0633f..74985b46cc4 100644 --- a/db/compaction/compaction_picker.cc +++ b/db/compaction/compaction_picker.cc @@ -377,7 +377,8 @@ Compaction* CompactionPicker::CompactFiles( output_level, compact_options.output_file_size_limit, mutable_cf_options.max_compaction_bytes, output_path_id, compression_type, GetCompressionOptions(mutable_cf_options, vstorage, output_level), - Temperature::kUnknown, compact_options.max_subcompactions, + mutable_cf_options.default_write_temperature, + compact_options.max_subcompactions, /* grandparents */ {}, true); RegisterCompaction(c); return c; @@ -664,7 +665,8 @@ Compaction* CompactionPicker::CompactRange( compact_range_options.target_path_id, GetCompressionType(vstorage, mutable_cf_options, output_level, 1), GetCompressionOptions(mutable_cf_options, vstorage, output_level), - Temperature::kUnknown, compact_range_options.max_subcompactions, + mutable_cf_options.default_write_temperature, + compact_range_options.max_subcompactions, /* grandparents */ {}, /* is manual */ true, trim_ts, /* score */ -1, /* deletion_compaction */ false, /* l0_files_might_overlap */ true, CompactionReason::kUnknown, @@ -852,8 +854,9 @@ Compaction* CompactionPicker::CompactRange( GetCompressionType(vstorage, mutable_cf_options, output_level, vstorage->base_level()), GetCompressionOptions(mutable_cf_options, vstorage, output_level), - Temperature::kUnknown, compact_range_options.max_subcompactions, - std::move(grandparents), /* is manual */ true, trim_ts, /* score */ -1, + mutable_cf_options.default_write_temperature, + compact_range_options.max_subcompactions, std::move(grandparents), + /* is manual */ true, trim_ts, /* score */ -1, /* deletion_compaction */ false, /* l0_files_might_overlap */ true, CompactionReason::kUnknown, compact_range_options.blob_garbage_collection_policy, diff --git a/db/compaction/compaction_picker_fifo.cc b/db/compaction/compaction_picker_fifo.cc index 50529777028..d898b5126de 100644 --- a/db/compaction/compaction_picker_fifo.cc +++ b/db/compaction/compaction_picker_fifo.cc @@ -116,7 +116,8 @@ Compaction* FIFOCompactionPicker::PickTTLCompaction( Compaction* c = new Compaction( vstorage, ioptions_, mutable_cf_options, mutable_db_options, std::move(inputs), 0, 0, 0, 0, kNoCompression, - mutable_cf_options.compression_opts, Temperature::kUnknown, + mutable_cf_options.compression_opts, + mutable_cf_options.default_write_temperature, /* max_subcompactions */ 0, {}, /* is manual */ false, /* trim_ts */ "", vstorage->CompactionScore(0), /* is deletion compaction */ true, /* l0_files_might_overlap */ true, @@ -185,7 +186,8 @@ Compaction* FIFOCompactionPicker::PickSizeCompaction( {comp_inputs}, 0, 16 * 1024 * 1024 /* output file size limit */, 0 /* max compaction bytes, not applicable */, 0 /* output path ID */, mutable_cf_options.compression, - mutable_cf_options.compression_opts, Temperature::kUnknown, + mutable_cf_options.compression_opts, + mutable_cf_options.default_write_temperature, 0 /* max_subcompactions */, {}, /* is manual */ false, /* trim_ts */ "", vstorage->CompactionScore(0), /* is deletion compaction */ false, @@ -280,7 +282,8 @@ Compaction* FIFOCompactionPicker::PickSizeCompaction( /* target_file_size */ 0, /* max_compaction_bytes */ 0, /* output_path_id */ 0, kNoCompression, - mutable_cf_options.compression_opts, Temperature::kUnknown, + mutable_cf_options.compression_opts, + mutable_cf_options.default_write_temperature, /* max_subcompactions */ 0, {}, /* is manual */ false, /* trim_ts */ "", vstorage->CompactionScore(0), /* is deletion compaction */ true, @@ -414,6 +417,7 @@ Compaction* FIFOCompactionPicker::PickTemperatureChangeCompaction( if (inputs[0].files.empty()) { return nullptr; } + assert(compaction_target_temp != Temperature::kLastTemperature); Compaction* c = new Compaction( vstorage, ioptions_, mutable_cf_options, mutable_db_options, diff --git a/db/compaction/compaction_picker_level.cc b/db/compaction/compaction_picker_level.cc index 328baa988c0..67f19168761 100644 --- a/db/compaction/compaction_picker_level.cc +++ b/db/compaction/compaction_picker_level.cc @@ -550,7 +550,7 @@ Compaction* LevelCompactionBuilder::GetCompaction() { GetCompressionType(vstorage_, mutable_cf_options_, output_level_, vstorage_->base_level()), GetCompressionOptions(mutable_cf_options_, vstorage_, output_level_), - Temperature::kUnknown, + mutable_cf_options_.default_write_temperature, /* max_subcompactions */ 0, std::move(grandparents_), is_manual_, /* trim_ts */ "", start_level_score_, false /* deletion_compaction */, l0_files_might_overlap, compaction_reason_); diff --git a/db/compaction/compaction_picker_universal.cc b/db/compaction/compaction_picker_universal.cc index ae0ec4d17c9..597edb7919a 100644 --- a/db/compaction/compaction_picker_universal.cc +++ b/db/compaction/compaction_picker_universal.cc @@ -870,7 +870,7 @@ Compaction* UniversalCompactionBuilder::PickCompactionToReduceSortedRuns( output_level, 1, enable_compression), GetCompressionOptions(mutable_cf_options_, vstorage_, output_level, enable_compression), - Temperature::kUnknown, + mutable_cf_options_.default_write_temperature, /* max_subcompactions */ 0, grandparents, /* is manual */ false, /* trim_ts */ "", score_, false /* deletion_compaction */, @@ -1204,7 +1204,7 @@ Compaction* UniversalCompactionBuilder::PickIncrementalForReduceSizeAmp( true /* enable_compression */), GetCompressionOptions(mutable_cf_options_, vstorage_, output_level, true /* enable_compression */), - Temperature::kUnknown, + mutable_cf_options_.default_write_temperature, /* max_subcompactions */ 0, /* grandparents */ {}, /* is manual */ false, /* trim_ts */ "", score_, false /* deletion_compaction */, /* l0_files_might_overlap */ true, @@ -1347,7 +1347,7 @@ Compaction* UniversalCompactionBuilder::PickDeleteTriggeredCompaction() { /* max_grandparent_overlap_bytes */ GetMaxOverlappingBytes(), path_id, GetCompressionType(vstorage_, mutable_cf_options_, output_level, 1), GetCompressionOptions(mutable_cf_options_, vstorage_, output_level), - Temperature::kUnknown, + mutable_cf_options_.default_write_temperature, /* max_subcompactions */ 0, grandparents, /* is manual */ false, /* trim_ts */ "", score_, false /* deletion_compaction */, /* l0_files_might_overlap */ true, @@ -1440,7 +1440,7 @@ Compaction* UniversalCompactionBuilder::PickCompactionWithSortedRunRange( true /* enable_compression */), GetCompressionOptions(mutable_cf_options_, vstorage_, output_level, true /* enable_compression */), - Temperature::kUnknown, + mutable_cf_options_.default_write_temperature, /* max_subcompactions */ 0, /* grandparents */ {}, /* is manual */ false, /* trim_ts */ "", score_, false /* deletion_compaction */, /* l0_files_might_overlap */ true, compaction_reason); diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index dc85951c25a..e0ecff67765 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -9164,66 +9164,104 @@ TEST_F(DBCompactionTest, CompactionWithChecksumHandoffManifest2) { } TEST_F(DBCompactionTest, FIFOChangeTemperature) { - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleFIFO; - options.num_levels = 1; - options.max_open_files = -1; - options.level0_file_num_compaction_trigger = 2; - options.create_if_missing = true; - CompactionOptionsFIFO fifo_options; - fifo_options.file_temperature_age_thresholds = {{Temperature::kCold, 1000}}; - fifo_options.max_table_files_size = 100000000; - options.compaction_options_fifo = fifo_options; - env_->SetMockSleep(); - Reopen(options); + for (bool write_time_default : {false, true}) { + SCOPED_TRACE("write time default? " + std::to_string(write_time_default)); - int total_cold = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "NewWritableFile::FileOptions.temperature", [&](void* arg) { - Temperature temperature = *(static_cast(arg)); - if (temperature == Temperature::kCold) { - total_cold++; - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + Options options = CurrentOptions(); + options.compaction_style = kCompactionStyleFIFO; + options.num_levels = 1; + options.max_open_files = -1; + options.level0_file_num_compaction_trigger = 2; + options.create_if_missing = true; + CompactionOptionsFIFO fifo_options; + fifo_options.file_temperature_age_thresholds = {{Temperature::kCold, 1000}}; + fifo_options.max_table_files_size = 100000000; + options.compaction_options_fifo = fifo_options; + env_->SetMockSleep(); + if (write_time_default) { + options.default_write_temperature = Temperature::kWarm; + } + // Should be ignored (TODO: fail?) + options.last_level_temperature = Temperature::kHot; + Reopen(options); - // The file system does not support checksum handoff. The check - // will be ignored. - ASSERT_OK(Put(Key(0), "value1")); - env_->MockSleepForSeconds(800); - ASSERT_OK(Put(Key(2), "value2")); - ASSERT_OK(Flush()); + int total_cold = 0; + int total_warm = 0; + int total_hot = 0; + int total_unknown = 0; + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "NewWritableFile::FileOptions.temperature", [&](void* arg) { + Temperature temperature = *(static_cast(arg)); + if (temperature == Temperature::kCold) { + total_cold++; + } else if (temperature == Temperature::kWarm) { + total_warm++; + } else if (temperature == Temperature::kHot) { + total_hot++; + } else { + assert(temperature == Temperature::kUnknown); + total_unknown++; + } + }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_OK(Put(Key(0), "value1")); - env_->MockSleepForSeconds(800); - ASSERT_OK(Put(Key(2), "value2")); - ASSERT_OK(Flush()); + // The file system does not support checksum handoff. The check + // will be ignored. + ASSERT_OK(Put(Key(0), "value1")); + env_->MockSleepForSeconds(800); + ASSERT_OK(Put(Key(2), "value2")); + ASSERT_OK(Flush()); - ASSERT_OK(Put(Key(0), "value1")); - env_->MockSleepForSeconds(800); - ASSERT_OK(Put(Key(2), "value2")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_OK(Put(Key(0), "value1")); + env_->MockSleepForSeconds(800); + ASSERT_OK(Put(Key(2), "value2")); + ASSERT_OK(Flush()); - ASSERT_OK(Put(Key(0), "value1")); - env_->MockSleepForSeconds(800); - ASSERT_OK(Put(Key(2), "value2")); - ASSERT_OK(Flush()); + ASSERT_OK(Put(Key(0), "value1")); + env_->MockSleepForSeconds(800); + ASSERT_OK(Put(Key(2), "value2")); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); + if (write_time_default) { + // Also test dynamic option change + ASSERT_OK(db_->SetOptions({{"default_write_temperature", "kHot"}})); + } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); + ASSERT_OK(Put(Key(0), "value1")); + env_->MockSleepForSeconds(800); + ASSERT_OK(Put(Key(2), "value2")); + ASSERT_OK(Flush()); - ColumnFamilyMetaData metadata; - db_->GetColumnFamilyMetaData(&metadata); - ASSERT_EQ(4, metadata.file_count); - ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[0].temperature); - ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[1].temperature); - ASSERT_EQ(Temperature::kCold, metadata.levels[0].files[2].temperature); - ASSERT_EQ(Temperature::kCold, metadata.levels[0].files[3].temperature); - ASSERT_EQ(2, total_cold); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); - Destroy(options); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); + + ColumnFamilyMetaData metadata; + db_->GetColumnFamilyMetaData(&metadata); + ASSERT_EQ(4, metadata.file_count); + if (write_time_default) { + ASSERT_EQ(Temperature::kHot, metadata.levels[0].files[0].temperature); + ASSERT_EQ(Temperature::kWarm, metadata.levels[0].files[1].temperature); + // Includes obsolete/deleted files moved to cold + ASSERT_EQ(total_warm, 3); + ASSERT_EQ(total_hot, 1); + // Includes non-SST DB files + ASSERT_GT(total_unknown, 0); + } else { + ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[0].temperature); + ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[1].temperature); + ASSERT_EQ(total_warm, 0); + ASSERT_EQ(total_hot, 0); + // Includes non-SST DB files + ASSERT_GT(total_unknown, 4); + } + ASSERT_EQ(Temperature::kCold, metadata.levels[0].files[2].temperature); + ASSERT_EQ(Temperature::kCold, metadata.levels[0].files[3].temperature); + ASSERT_EQ(2, total_cold); + + Destroy(options); + } } TEST_F(DBCompactionTest, DisableMultiManualCompaction) { diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index fd73dd4e426..63f280b99db 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -1868,7 +1868,8 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) { , LLONG_MAX /* max compaction bytes, not applicable */, 0 /* output path ID, not applicable */, mutable_cf_options.compression, - mutable_cf_options.compression_opts, Temperature::kUnknown, + mutable_cf_options.compression_opts, + mutable_cf_options.default_write_temperature, 0 /* max_subcompactions, not applicable */, {} /* grandparents, not applicable */, false /* is manual */, "" /* trim_ts */, -1 /* score, not applicable */, diff --git a/db/db_test2.cc b/db/db_test2.cc index a515dcd0e76..8d7f37f6d37 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -6904,88 +6904,127 @@ TEST_F(DBTest2, LastLevelTemperatureUniversal) { } TEST_F(DBTest2, LastLevelStatistics) { - Options options = CurrentOptions(); - options.last_level_temperature = Temperature::kWarm; - options.default_temperature = Temperature::kHot; - options.level0_file_num_compaction_trigger = 2; - options.level_compaction_dynamic_level_bytes = true; - options.statistics = CreateDBStatistics(); - Reopen(options); - - // generate 1 sst on level 0 - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(Put("bar", "bar")); - ASSERT_OK(Flush()); - ASSERT_EQ("bar", Get("bar")); - - ASSERT_GT(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES), 0); - ASSERT_GT(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT), 0); - ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES), - options.statistics->getTickerCount(HOT_FILE_READ_BYTES)); - ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT), - options.statistics->getTickerCount(HOT_FILE_READ_COUNT)); - ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_BYTES), 0); - ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT), 0); - - // 2nd flush to trigger compaction - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(Put("bar", "bar")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ("bar", Get("bar")); - - ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES), - options.statistics->getTickerCount(HOT_FILE_READ_BYTES)); - ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT), - options.statistics->getTickerCount(HOT_FILE_READ_COUNT)); - ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_BYTES), - options.statistics->getTickerCount(WARM_FILE_READ_BYTES)); - ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT), - options.statistics->getTickerCount(WARM_FILE_READ_COUNT)); - - auto pre_bytes = - options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES); - auto pre_count = - options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT); - - // 3rd flush to generate 1 sst on level 0 - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(Put("bar", "bar")); - ASSERT_OK(Flush()); - ASSERT_EQ("bar", Get("bar")); + for (bool write_time_default : {false, true}) { + SCOPED_TRACE("write time default? " + std::to_string(write_time_default)); + Options options = CurrentOptions(); + options.last_level_temperature = Temperature::kWarm; + if (write_time_default) { + options.default_write_temperature = Temperature::kHot; + ASSERT_EQ(options.default_temperature, Temperature::kUnknown); + } else { + options.default_temperature = Temperature::kHot; + ASSERT_EQ(options.default_write_temperature, Temperature::kUnknown); + } + options.level0_file_num_compaction_trigger = 2; + options.level_compaction_dynamic_level_bytes = true; + options.statistics = CreateDBStatistics(); + BlockBasedTableOptions bbto; + bbto.no_block_cache = true; + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - ASSERT_GT(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES), - pre_bytes); - ASSERT_GT(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT), - pre_count); - ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES), - options.statistics->getTickerCount(HOT_FILE_READ_BYTES)); - ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT), - options.statistics->getTickerCount(HOT_FILE_READ_COUNT)); - ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_BYTES), - options.statistics->getTickerCount(WARM_FILE_READ_BYTES)); - ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT), - options.statistics->getTickerCount(WARM_FILE_READ_COUNT)); - - // Not a realistic setting to make last level kWarm and default temp kCold. - // This is just for testing default temp can be reset on reopen while the - // last level temp is consistent across DB reopen because those file's temp - // are persisted in manifest. - options.default_temperature = Temperature::kCold; - ASSERT_OK(options.statistics->Reset()); - Reopen(options); - ASSERT_EQ("bar", Get("bar")); + DestroyAndReopen(options); - ASSERT_EQ(0, options.statistics->getTickerCount(HOT_FILE_READ_BYTES)); + // generate 1 sst on level 0 + ASSERT_OK(Put("foo1", "bar")); + ASSERT_OK(Put("bar", "bar")); + ASSERT_OK(Flush()); + ASSERT_EQ("bar", Get("bar")); + + ASSERT_GT(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES), 0); + ASSERT_GT(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT), 0); + ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES), + options.statistics->getTickerCount(HOT_FILE_READ_BYTES)); + ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT), + options.statistics->getTickerCount(HOT_FILE_READ_COUNT)); + ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_BYTES), 0); + ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT), 0); + + // 2nd flush to trigger compaction + ASSERT_OK(Put("foo2", "bar")); + ASSERT_OK(Put("bar", "bar")); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_EQ("bar", Get("bar")); + + ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES), + options.statistics->getTickerCount(HOT_FILE_READ_BYTES)); + ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT), + options.statistics->getTickerCount(HOT_FILE_READ_COUNT)); + ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_BYTES), + options.statistics->getTickerCount(WARM_FILE_READ_BYTES)); + ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT), + options.statistics->getTickerCount(WARM_FILE_READ_COUNT)); + + auto pre_bytes = + options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES); + auto pre_count = + options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT); + + // 3rd flush to generate 1 sst on level 0 + ASSERT_OK(Put("foo3", "bar")); + ASSERT_OK(Put("bar", "bar")); + ASSERT_OK(Flush()); + ASSERT_EQ("bar", Get("foo1")); + ASSERT_EQ("bar", Get("foo2")); + ASSERT_EQ("bar", Get("foo3")); + ASSERT_EQ("bar", Get("bar")); + + ASSERT_GT(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES), + pre_bytes); + ASSERT_GT(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT), + pre_count); + ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES), + options.statistics->getTickerCount(HOT_FILE_READ_BYTES)); + ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT), + options.statistics->getTickerCount(HOT_FILE_READ_COUNT)); + ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_BYTES), + options.statistics->getTickerCount(WARM_FILE_READ_BYTES)); + ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT), + options.statistics->getTickerCount(WARM_FILE_READ_COUNT)); + // Control + ASSERT_NE(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT), + options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT)); + + // Not a realistic setting to make last level kWarm and default temp kCold. + // This is just for testing default temp can be reset on reopen while the + // last level temp is consistent across DB reopen because those file's temp + // are persisted in manifest. + options.default_temperature = Temperature::kCold; + ASSERT_OK(options.statistics->Reset()); + Reopen(options); + ASSERT_EQ("bar", Get("foo1")); + ASSERT_EQ("bar", Get("foo2")); + ASSERT_EQ("bar", Get("foo3")); + ASSERT_EQ("bar", Get("bar")); + + if (write_time_default) { + // Unchanged + ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES), + options.statistics->getTickerCount(HOT_FILE_READ_BYTES)); + ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT), + options.statistics->getTickerCount(HOT_FILE_READ_COUNT)); + + ASSERT_LT(0, options.statistics->getTickerCount(HOT_FILE_READ_BYTES)); + ASSERT_EQ(0, options.statistics->getTickerCount(COLD_FILE_READ_BYTES)); + } else { + // Changed (in how we map kUnknown) + ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES), + options.statistics->getTickerCount(COLD_FILE_READ_BYTES)); + ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT), + options.statistics->getTickerCount(COLD_FILE_READ_COUNT)); + + ASSERT_EQ(0, options.statistics->getTickerCount(HOT_FILE_READ_BYTES)); + ASSERT_LT(0, options.statistics->getTickerCount(COLD_FILE_READ_BYTES)); + } - ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES), - options.statistics->getTickerCount(COLD_FILE_READ_BYTES)); - ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT), - options.statistics->getTickerCount(COLD_FILE_READ_COUNT)); - ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_BYTES), - options.statistics->getTickerCount(WARM_FILE_READ_BYTES)); - ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT), - options.statistics->getTickerCount(WARM_FILE_READ_COUNT)); + ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_BYTES), + options.statistics->getTickerCount(WARM_FILE_READ_BYTES)); + ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT), + options.statistics->getTickerCount(WARM_FILE_READ_COUNT)); + // Control + ASSERT_NE(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT), + options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT)); + } } TEST_F(DBTest2, CheckpointFileTemperature) { diff --git a/db/external_sst_file_ingestion_job.cc b/db/external_sst_file_ingestion_job.cc index 9f505de2126..c8c777c58ae 100644 --- a/db/external_sst_file_ingestion_job.cc +++ b/db/external_sst_file_ingestion_job.cc @@ -160,6 +160,8 @@ Status ExternalSstFileIngestionJob::Prepare( TEST_SYNC_POINT_CALLBACK("ExternalSstFileIngestionJob::Prepare:CopyFile", nullptr); // CopyFile also sync the new file. + // FIXME: use sv->mutable_cf_options.default_write_temperature and + // sort out exact temperature handling status = CopyFile(fs_.get(), path_outside_db, path_inside_db, 0, db_options_.use_fsync, io_tracer_, Temperature::kUnknown); @@ -515,7 +517,8 @@ void ExternalSstFileIngestionJob::CreateEquivalentFileIngestingCompactions() { , LLONG_MAX /* max compaction bytes, not applicable */, 0 /* output path ID, not applicable */, mutable_cf_options.compression, - mutable_cf_options.compression_opts, Temperature::kUnknown, + mutable_cf_options.compression_opts, + mutable_cf_options.default_write_temperature, 0 /* max_subcompaction, not applicable */, {} /* grandparents, not applicable */, false /* is manual */, "" /* trim_ts */, -1 /* score, not applicable */, diff --git a/db/flush_job.cc b/db/flush_job.cc index 393d710089e..085f368d490 100644 --- a/db/flush_job.cc +++ b/db/flush_job.cc @@ -856,6 +856,8 @@ Status FlushJob::WriteLevel0Table() { seqno_to_time_mapping_.CopyFromSeqnoRange(db_impl_seqno_to_time_mapping_, smallest_seqno); } + meta_.temperature = mutable_cf_options_.default_write_temperature; + file_options_.temperature = meta_.temperature; std::vector blob_file_additions; diff --git a/db/flush_job.h b/db/flush_job.h index d348176c1df..0667a09db37 100644 --- a/db/flush_job.h +++ b/db/flush_job.h @@ -157,7 +157,7 @@ class FlushJob { // this job. All memtables in this column family with an ID smaller than or // equal to max_memtable_id_ will be selected for flush. uint64_t max_memtable_id_; - const FileOptions file_options_; + FileOptions file_options_; VersionSet* versions_; InstrumentedMutex* db_mutex_; std::atomic* shutting_down_; diff --git a/file/random_access_file_reader.cc b/file/random_access_file_reader.cc index 688ef35660c..e2f757f5bf8 100644 --- a/file/random_access_file_reader.cc +++ b/file/random_access_file_reader.cc @@ -462,7 +462,6 @@ IOStatus RandomAccessFileReader::MultiRead(const IOOptions& opts, file_name(), read_reqs[i].result.size(), read_reqs[i].offset); } - RecordIOStats(stats_, file_temperature_, is_last_level_, read_reqs[i].result.size()); } diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h index 58e9dc9d0fa..fdf2af058de 100644 --- a/include/rocksdb/advanced_options.h +++ b/include/rocksdb/advanced_options.h @@ -803,20 +803,28 @@ struct AdvancedColumnFamilyOptions { uint64_t sample_for_compression = 0; // EXPERIMENTAL - // The feature is still in development and is incomplete. // If this option is set, when creating the last level files, pass this // temperature to FileSystem used. Should be no-op for default FileSystem // and users need to plug in their own FileSystem to take advantage of it. + // When using FIFO compaction, this option is ignored. // // Dynamically changeable through the SetOptions() API Temperature last_level_temperature = Temperature::kUnknown; + // EXPERIMENTAL + // When no other option such as last_level_temperature determines the + // temperature of a new SST file, it will be written with this temperature, + // which can be set differently for each column family. + // + // Dynamically changeable through the SetOptions() API + Temperature default_write_temperature = Temperature::kUnknown; + // EXPERIMENTAL // When this field is set, all SST files without an explicitly set temperature // will be treated as if they have this temperature for file reading // accounting purpose, such as io statistics, io perf context. // - // Not dynamically changeable, change it requires db restart. + // Not dynamically changeable; change requires DB restart. Temperature default_temperature = Temperature::kUnknown; // EXPERIMENTAL diff --git a/options/cf_options.cc b/options/cf_options.cc index b4b28ea8b5e..3f3af21a074 100644 --- a/options/cf_options.cc +++ b/options/cf_options.cc @@ -449,6 +449,10 @@ static std::unordered_map {offsetof(struct MutableCFOptions, last_level_temperature), OptionType::kTemperature, OptionVerificationType::kNormal, OptionTypeFlags::kMutable}}, + {"default_write_temperature", + {offsetof(struct MutableCFOptions, default_write_temperature), + OptionType::kTemperature, OptionVerificationType::kNormal, + OptionTypeFlags::kMutable}}, {"enable_blob_files", {offsetof(struct MutableCFOptions, enable_blob_files), OptionType::kBoolean, OptionVerificationType::kNormal, diff --git a/options/cf_options.h b/options/cf_options.h index c51164a19e4..2c3cd61aeff 100644 --- a/options/cf_options.h +++ b/options/cf_options.h @@ -164,6 +164,7 @@ struct MutableCFOptions { compression_opts(options.compression_opts), bottommost_compression_opts(options.bottommost_compression_opts), last_level_temperature(options.last_level_temperature), + default_write_temperature(options.default_write_temperature), memtable_protection_bytes_per_key( options.memtable_protection_bytes_per_key), block_protection_bytes_per_key(options.block_protection_bytes_per_key), @@ -218,6 +219,7 @@ struct MutableCFOptions { compression(Snappy_Supported() ? kSnappyCompression : kNoCompression), bottommost_compression(kDisableCompressionOption), last_level_temperature(Temperature::kUnknown), + default_write_temperature(Temperature::kUnknown), memtable_protection_bytes_per_key(0), block_protection_bytes_per_key(0), sample_for_compression(0), @@ -309,6 +311,7 @@ struct MutableCFOptions { CompressionOptions compression_opts; CompressionOptions bottommost_compression_opts; Temperature last_level_temperature; + Temperature default_write_temperature; uint32_t memtable_protection_bytes_per_key; uint8_t block_protection_bytes_per_key; diff --git a/options/options.cc b/options/options.cc index b39ab6445d6..514fdb6d012 100644 --- a/options/options.cc +++ b/options/options.cc @@ -93,6 +93,8 @@ AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions(const Options& options) ttl(options.ttl), periodic_compaction_seconds(options.periodic_compaction_seconds), sample_for_compression(options.sample_for_compression), + last_level_temperature(options.last_level_temperature), + default_write_temperature(options.default_write_temperature), default_temperature(options.default_temperature), preclude_last_level_data_seconds( options.preclude_last_level_data_seconds), diff --git a/options/options_helper.cc b/options/options_helper.cc index a0fd5c9e574..db8892102ac 100644 --- a/options/options_helper.cc +++ b/options/options_helper.cc @@ -272,6 +272,7 @@ void UpdateColumnFamilyOptions(const MutableCFOptions& moptions, cf_opts->sample_for_compression = moptions.sample_for_compression; cf_opts->compression_per_level = moptions.compression_per_level; cf_opts->last_level_temperature = moptions.last_level_temperature; + cf_opts->default_write_temperature = moptions.default_write_temperature; cf_opts->memtable_max_range_deletions = moptions.memtable_max_range_deletions; } diff --git a/options/options_settable_test.cc b/options/options_settable_test.cc index 20c746ac31d..747b888af5e 100644 --- a/options/options_settable_test.cc +++ b/options/options_settable_test.cc @@ -553,6 +553,7 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) { "prepopulate_blob_cache=kDisable;" "bottommost_temperature=kWarm;" "last_level_temperature=kWarm;" + "default_write_temperature=kCold;" "default_temperature=kHot;" "preclude_last_level_data_seconds=86400;" "preserve_internal_time_seconds=86400;" diff --git a/options/options_test.cc b/options/options_test.cc index 93c76f5512d..7fa89aec974 100644 --- a/options/options_test.cc +++ b/options/options_test.cc @@ -131,6 +131,7 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) { {"blob_file_starting_level", "1"}, {"prepopulate_blob_cache", "kDisable"}, {"last_level_temperature", "kWarm"}, + {"default_write_temperature", "kCold"}, {"default_temperature", "kHot"}, {"persist_user_defined_timestamps", "true"}, {"memtable_max_range_deletions", "0"}, @@ -287,6 +288,7 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) { ASSERT_EQ(new_cf_opt.blob_file_starting_level, 1); ASSERT_EQ(new_cf_opt.prepopulate_blob_cache, PrepopulateBlobCache::kDisable); ASSERT_EQ(new_cf_opt.last_level_temperature, Temperature::kWarm); + ASSERT_EQ(new_cf_opt.default_write_temperature, Temperature::kCold); ASSERT_EQ(new_cf_opt.default_temperature, Temperature::kHot); ASSERT_EQ(new_cf_opt.persist_user_defined_timestamps, true); ASSERT_EQ(new_cf_opt.memtable_max_range_deletions, 0); @@ -2350,6 +2352,7 @@ TEST_F(OptionsOldApiTest, GetOptionsFromMapTest) { {"blob_file_starting_level", "1"}, {"prepopulate_blob_cache", "kDisable"}, {"last_level_temperature", "kWarm"}, + {"default_write_temperature", "kCold"}, {"default_temperature", "kHot"}, {"persist_user_defined_timestamps", "true"}, {"memtable_max_range_deletions", "0"}, @@ -2502,6 +2505,7 @@ TEST_F(OptionsOldApiTest, GetOptionsFromMapTest) { ASSERT_EQ(new_cf_opt.blob_file_starting_level, 1); ASSERT_EQ(new_cf_opt.prepopulate_blob_cache, PrepopulateBlobCache::kDisable); ASSERT_EQ(new_cf_opt.last_level_temperature, Temperature::kWarm); + ASSERT_EQ(new_cf_opt.default_write_temperature, Temperature::kCold); ASSERT_EQ(new_cf_opt.default_temperature, Temperature::kHot); ASSERT_EQ(new_cf_opt.persist_user_defined_timestamps, true); ASSERT_EQ(new_cf_opt.memtable_max_range_deletions, 0);