From 50fc35aaa0cf187ed7d2e47e785801f83cb93077 Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Mon, 23 Oct 2023 19:45:01 -0700 Subject: [PATCH] Fix race between flush error recovery VS db destruction --- db/error_handler.cc | 8 ++++ db/error_handler_fs_test.cc | 47 +++++++++++++++++++ .../flush_recovery_db_destructor_race.md | 1 + 3 files changed, 56 insertions(+) create mode 100644 unreleased_history/bug_fixes/flush_recovery_db_destructor_race.md diff --git a/db/error_handler.cc b/db/error_handler.cc index 95b9a0fe6a69..21c3a686f576 100644 --- a/db/error_handler.cc +++ b/db/error_handler.cc @@ -645,6 +645,13 @@ const Status& ErrorHandler::StartRecoverFromRetryableBGIOError( } else if (db_options_.max_bgerror_resume_count <= 0 || recovery_in_prog_) { // Auto resume BG error is not enabled, directly return bg_error_. return bg_error_; + } else if (end_recovery_) { + // Can temporarily release db mutex + EventHelpers::NotifyOnErrorRecoveryEnd(db_options_.listeners, bg_error_, + Status::ShutdownInProgress(), + db_mutex_); + db_mutex_->AssertHeld(); + return bg_error_; } if (bg_error_stats_ != nullptr) { RecordTick(bg_error_stats_.get(), ERROR_HANDLER_AUTORESUME_COUNT); @@ -819,6 +826,7 @@ void ErrorHandler::EndAutoRecovery() { old_recovery_thread->join(); db_mutex_->Lock(); } + TEST_SYNC_POINT("PostEndAutoRecovery"); return; } diff --git a/db/error_handler_fs_test.cc b/db/error_handler_fs_test.cc index bbff8c7fe38b..d627b29b19b7 100644 --- a/db/error_handler_fs_test.cc +++ b/db/error_handler_fs_test.cc @@ -13,6 +13,7 @@ #include "rocksdb/io_status.h" #include "rocksdb/sst_file_manager.h" #include "test_util/sync_point.h" +#include "test_util/testharness.h" #include "util/random.h" #include "utilities/fault_injection_env.h" #include "utilities/fault_injection_fs.h" @@ -2473,6 +2474,52 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAbortRecovery) { Destroy(options); } +TEST_F(DBErrorHandlingFSTest, FlushErrorRecoveryRaceWithDBDestruction) { + Options options = GetDefaultOptions(); + options.env = fault_env_.get(); + options.create_if_missing = true; + DestroyAndReopen(options); + ASSERT_OK(Put("k1", "val")); + + // Inject retryable flush error + bool error_set = false; + SyncPoint::GetInstance()->SetCallBack( + "BuildTable:BeforeOutputValidation", [&](void*) { + if (error_set) { + return; + } + IOStatus st = IOStatus::IOError("Injected"); + st.SetRetryable(true); + fault_fs_->SetFilesystemActive(false, st); + error_set = true; + }); + + port::Thread db_close_thread; + SyncPoint::GetInstance()->SetCallBack( + "BuildTable:BeforeDeleteFile", [&](void*) { + // Clear retryable flush error injection + fault_fs_->SetFilesystemActive(true); + + // Coerce race between ending auto recovery in db destruction and flush + // error recovery + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( + {{"PostEndAutoRecovery", + "StartRecoverFromRetryableBGIOError::in_progress"}}); + db_close_thread = port::Thread([&]() { Close(); }); + }); + SyncPoint::GetInstance()->EnableProcessing(); + + Status s = Flush(); + ASSERT_NOK(s); + // Prior to the fix, the db close will crash due to the recovery thread for + // flush error is not joined by the time of destruction. + db_close_thread.join(); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + Destroy(options); +} + TEST_F(DBErrorHandlingFSTest, FlushReadError) { std::shared_ptr listener = std::make_shared(); diff --git a/unreleased_history/bug_fixes/flush_recovery_db_destructor_race.md b/unreleased_history/bug_fixes/flush_recovery_db_destructor_race.md new file mode 100644 index 000000000000..76cc3c72133e --- /dev/null +++ b/unreleased_history/bug_fixes/flush_recovery_db_destructor_race.md @@ -0,0 +1 @@ +Fix a race between flush error recovery and db destruction that can lead to db crashing.