Skip to content

Commit

Permalink
Repro + hack fix
Browse files Browse the repository at this point in the history
  • Loading branch information
hx235 committed Oct 22, 2023
1 parent 543191f commit e70f7ce
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 1 deletion.
1 change: 1 addition & 0 deletions db/db_impl/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,7 @@ Status DBImpl::CloseHelper() {
// continuing with the shutdown
mutex_.Lock();
shutdown_initiated_ = true;
// error_handler_.SetDBShutdownInProgress();
error_handler_.CancelErrorRecovery();
while (error_handler_.IsRecoveryInProgress()) {
bg_cv_.Wait();
Expand Down
4 changes: 4 additions & 0 deletions db/error_handler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,9 @@ const Status& ErrorHandler::StartRecoverFromRetryableBGIOError(
return bg_error_;
} else if (io_error.ok()) {
return kOkStatus;
// } else if (db_options_.max_bgerror_resume_count <= 0 || recovery_in_prog_
// ||
// db_shutdown_in_progress_) {
} else if (db_options_.max_bgerror_resume_count <= 0 || recovery_in_prog_) {
// Auto resume BG error is not enabled, directly return bg_error_.
return bg_error_;
Expand Down Expand Up @@ -819,6 +822,7 @@ void ErrorHandler::EndAutoRecovery() {
old_recovery_thread->join();
db_mutex_->Lock();
}
TEST_SYNC_POINT("BeforeEndAutoRecoveryFinish3");
return;
}

Expand Down
6 changes: 6 additions & 0 deletions db/error_handler.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ class ErrorHandler {

const Status& SetBGError(const Status& bg_err, BackgroundErrorReason reason);

void SetDBShutdownInProgress() {
// Fix is here!
//
// db_shutdown_in_progress_ = true;
}
Status GetBGError() const { return bg_error_; }

Status GetRecoveryError() const { return recovery_error_; }
Expand Down Expand Up @@ -113,6 +118,7 @@ class ErrorHandler {
// is updated while holding db mutex.
bool recovery_disabled_file_deletion_;

// bool db_shutdown_in_progress_;
const Status& HandleKnownErrors(const Status& bg_err,
BackgroundErrorReason reason);
Status OverrideNoSpaceError(const Status& bg_error, bool* auto_recovery);
Expand Down
41 changes: 40 additions & 1 deletion db/error_handler_fs_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,18 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.

#include <iostream>

#include "db/db_test_util.h"
#include "file/sst_file_manager_impl.h"
#include "port/stack_trace.h"
#include "rocksdb/io_status.h"
#include "rocksdb/sst_file_manager.h"
#include "test_util/sync_point.h"
#include "test_util/testharness.h"
#include "util/random.h"
#include "utilities/fault_injection_env.h"
#include "utilities/fault_injection_fs.h"

namespace ROCKSDB_NAMESPACE {

class DBErrorHandlingFSTest : public DBTestBase {
Expand Down Expand Up @@ -2473,6 +2475,43 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAbortRecovery) {
Destroy(options);
}

TEST_F(DBErrorHandlingFSTest, FlushReadErrorWithReopen) {
std::vector<port::Thread> threads;

Options options = GetDefaultOptions();
options.env = fault_env_.get();
options.create_if_missing = true;
DestroyAndReopen(options);

ASSERT_OK(Put("k1", "val"));
bool error_set = false;
SyncPoint::GetInstance()->SetCallBack(
"BuildTable:BeforeOutputValidation", [&](void*) {
if (error_set) {
return;
}
IOStatus st = IOStatus::IOError();
st.SetRetryable(true);
fault_fs_->SetFilesystemActive(false, st);
error_set = true;
});
SyncPoint::GetInstance()->SetCallBack(
"BuildTable:BeforeDeleteFile", [&](void*) {
fault_fs_->SetFilesystemActive(true);
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
{{"BeforeEndAutoRecoveryFinish3",
"StartRecoverFromRetryableBGIOError::in_progress"}});
threads.push_back(port::Thread([&]() { Reopen(options); }));
});

SyncPoint::GetInstance()->EnableProcessing();
Status s = Flush();
ASSERT_NOK(s);
threads[0].join();
SyncPoint::GetInstance()->DisableProcessing();
Destroy(options);
}

TEST_F(DBErrorHandlingFSTest, FlushReadError) {
std::shared_ptr<ErrorHandlerFSListener> listener =
std::make_shared<ErrorHandlerFSListener>();
Expand Down

0 comments on commit e70f7ce

Please sign in to comment.