Skip to content

Commit

Permalink
fix(lua): Fix a deadlock happenning when calling a lua script
Browse files Browse the repository at this point in the history
The scenario is described in a unit test that reproduces the issue with high chance.
Also added dragonfly_test in repeat=100 mode to CI.

Signed-off-by: Roman Gershman <[email protected]>
  • Loading branch information
romange committed Jan 25, 2023
1 parent 59bfeca commit 470f81e
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 5 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ jobs:
echo Run ctest -V -L DFLY
#GLOG_logtostderr=1 GLOG_vmodule=transaction=1,engine_shard_set=1
GLOG_logtostderr=1 GLOG_vmodule=rdb_load=1,rdb_save=1,snapshot=1 ctest -V -L DFLY
./dragonfly_test --mem_defrag_threshold=0.05 # trying to catch issue with defrag
./dragonfly_test --gtest_repeat=100
# GLOG_logtostderr=1 GLOG_vmodule=transaction=1,engine_shard_set=1 CTEST_OUTPUT_ON_FAILURE=1 ninja server/test
lint-test-chart:
runs-on: ubuntu-latest
Expand Down
25 changes: 25 additions & 0 deletions src/server/dragonfly_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,31 @@ TEST_F(DflyEngineTest, EvalBug713) {
fb0.Join();
}

// Tests deadlock because trans->Schedule was before interpreter->Lock() has been called.
// Problematic scenario:
// 1. transaction 1 schedules itself and blocks on an interpreter lock
// 2. transaction 2 schedules itself, but meanwhile an interpreter unlocks itself and
// transaction 2 grabs the lock but can not progress due to transaction 1 already
// scheduled before.
TEST_F(DflyEngineTest, EvalBug713b) {
const char* script = "return redis.call('get', KEYS[1])";

const uint32_t kNumFibers = 20;
fibers_ext::Fiber fibers[kNumFibers];

for (unsigned j = 0; j < kNumFibers; ++j) {
fibers[j] = pp_->at(1)->LaunchFiber([=, this] {
for (unsigned i = 0; i < 50; ++i) {
Run(StrCat("fb", j), {"eval", script, "3", kKeySid0, kKeySid1, kKeySid2});
}
});
}

for (unsigned j = 0; j < kNumFibers; ++j) {
fibers[j].Join();
}
}

TEST_F(DflyEngineTest, EvalSha) {
auto resp = Run({"script", "load", "return 5"});
EXPECT_THAT(resp, ArgType(RespExpr::STRING));
Expand Down
8 changes: 5 additions & 3 deletions src/server/main_service.cc
Original file line number Diff line number Diff line change
Expand Up @@ -906,6 +906,8 @@ void Service::Unwatch(CmdArgList args, ConnectionContext* cntx) {

void Service::CallFromScript(CmdArgList args, ObjectExplorer* reply, ConnectionContext* cntx) {
DCHECK(cntx->transaction);
DVLOG(1) << "CallFromScript " << cntx->transaction->DebugId() << " " << ArgS(args, 0);

InterpreterReplier replier(reply);
facade::SinkReplyBuilder* orig = cntx->Inject(&replier);

Expand Down Expand Up @@ -1013,11 +1015,11 @@ void Service::EvalInternal(const EvalArgs& eval_args, Interpreter* interpreter,
}
DCHECK(cntx->transaction);

auto lk = interpreter->Lock();

if (!eval_args.keys.empty())
cntx->transaction->Schedule();

auto lk = interpreter->Lock();

interpreter->SetGlobalArray("KEYS", eval_args.keys);
interpreter->SetGlobalArray("ARGV", eval_args.args);
interpreter->SetRedisFunc(
Expand All @@ -1039,12 +1041,12 @@ void Service::EvalInternal(const EvalArgs& eval_args, Interpreter* interpreter,
CHECK(result == Interpreter::RUN_OK);

EvalSerializer ser{static_cast<RedisReplyBuilder*>(cntx->reply_builder())};

if (!interpreter->IsResultSafe()) {
(*cntx)->SendError("reached lua stack limit");
} else {
interpreter->SerializeResult(&ser);
}

interpreter->ResetStack();
}

Expand Down
2 changes: 1 addition & 1 deletion src/server/transaction.cc
Original file line number Diff line number Diff line change
Expand Up @@ -904,7 +904,7 @@ pair<bool, bool> Transaction::ScheduleInShard(EngineShard* shard) {
// All transactions in the queue must acquire the intent lock.
lock_granted = shard->db_slice().Acquire(mode, lock_args) && shard_unlocked;
sd.local_mask |= KEYLOCK_ACQUIRED;
DVLOG(1) << "Lock granted " << lock_granted << " for trans " << DebugId();
DVLOG(2) << "Lock granted " << lock_granted << " for trans " << DebugId();
}

if (!txq->Empty()) {
Expand Down

0 comments on commit 470f81e

Please sign in to comment.