From 1268cba72a2e73373de2f525b6bf0256e9c88aae Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 21 Jan 2022 15:17:59 +0300 Subject: [PATCH 1/3] Fix bug which lead to out of memory --- src/handle_commit.cxx | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/handle_commit.cxx b/src/handle_commit.cxx index d34722ac..9052561e 100644 --- a/src/handle_commit.cxx +++ b/src/handle_commit.cxx @@ -315,14 +315,27 @@ void raft_server::commit_app_log(ulong idx_to_commit, std::list< ptr > async_elems; if (need_to_handle_commit_elem) { std::unique_lock cre_lock(commit_ret_elems_lock_); - bool match_found = false; + /// Sometimes user can batch requests to RAFT: for example send 30 + /// append entries requests in a single batch. For such request batch + /// user will receive a single response: all was successful or all + /// failed. Obviously we don't need to add info about responses + /// (commit_ret_elems) for 29 requests from batch and need to do it only + /// for 30-th request. precommit_index is exact value which identify ID + /// of the last request from the latest batch. So if we commiting this + /// last request and for some reason it was not added into + /// commit_ret_elems in the handle_cli_req method (logical race + /// condition) we have to add it here. Otherwise we don't need to add + /// anything into commit_ret_elems_, because nobody will wait for the + /// responses of the intermediate requests from requests batch. + bool need_to_wait_commit_ret = sm_idx == pc_idx; + auto entry = commit_ret_elems_.find(sm_idx); if (entry != commit_ret_elems_.end()) { ptr elem = entry->second; if (elem->idx_ == sm_idx) { elem->result_code_ = cmd_result_code::OK; elem->ret_value_ = ret_value; - match_found = true; + need_to_wait_commit_ret = false; p_dv("notify cb %ld %p", sm_idx, &elem->awaiter_); switch (ctx_->get_params()->return_method_) { @@ -341,7 +354,7 @@ void raft_server::commit_app_log(ulong idx_to_commit, } } - if (!match_found && !initial_commit_exec) { + if (need_to_wait_commit_ret && !initial_commit_exec) { // If not found, commit thread is invoked earlier than user thread. // Create one here. ptr elem = cs_new(); From 5e2b6f6506fb1441ccc3c015486d05c666760206 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 21 Jan 2022 15:47:53 +0300 Subject: [PATCH 2/3] Followup --- include/libnuraft/raft_server.hxx | 2 +- src/handle_client_request.cxx | 13 +++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/include/libnuraft/raft_server.hxx b/include/libnuraft/raft_server.hxx index a959b3e3..810670e7 100644 --- a/include/libnuraft/raft_server.hxx +++ b/include/libnuraft/raft_server.hxx @@ -727,7 +727,7 @@ protected: ptr handle_cli_req_callback(ptr elem, ptr resp); ptr< cmd_result< ptr > > - handle_cli_req_callback_async(ptr< cmd_result< ptr > > async_res); + handle_cli_req_callback_async(ptr< cmd_result< ptr > > async_res, uint64_t elem_idx); void drop_all_pending_commit_elems(); diff --git a/src/handle_client_request.cxx b/src/handle_client_request.cxx index 7c54f9c7..22cf6f0c 100644 --- a/src/handle_client_request.cxx +++ b/src/handle_client_request.cxx @@ -157,7 +157,7 @@ ptr raft_server::handle_cli_req(req_msg& req) { resp->set_async_cb ( std::bind( &raft_server::handle_cli_req_callback_async, this, - elem->async_result_ ) ); + elem->async_result_, elem->idx_ ) ); break; } } @@ -214,9 +214,18 @@ ptr raft_server::handle_cli_req_callback(ptr elem, } ptr< cmd_result< ptr > > - raft_server::handle_cli_req_callback_async(ptr< cmd_result< ptr > > async_res) + raft_server::handle_cli_req_callback_async(ptr< cmd_result< ptr > > async_res, uint64_t elem_idx) { async_res->accept(); + /// If element was added into commit_ret_elems_ from commit thread we have + /// to remove it here to avoid memory leak. Otherwise it will be already + /// removed. + { auto_lock(commit_ret_elems_lock_); + auto it = commit_ret_elems_.find(elem_idx) + if (it != commit_ret_elems_.end()) { + commit_ret_elems_.erase(it); + } + } return async_res; } From 7c6c696134706eeef274787c9282e40416dc03e8 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 21 Jan 2022 15:55:37 +0300 Subject: [PATCH 3/3] Revert "Followup" This reverts commit 5e2b6f6506fb1441ccc3c015486d05c666760206. --- include/libnuraft/raft_server.hxx | 2 +- src/handle_client_request.cxx | 13 ++----------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/include/libnuraft/raft_server.hxx b/include/libnuraft/raft_server.hxx index 810670e7..a959b3e3 100644 --- a/include/libnuraft/raft_server.hxx +++ b/include/libnuraft/raft_server.hxx @@ -727,7 +727,7 @@ protected: ptr handle_cli_req_callback(ptr elem, ptr resp); ptr< cmd_result< ptr > > - handle_cli_req_callback_async(ptr< cmd_result< ptr > > async_res, uint64_t elem_idx); + handle_cli_req_callback_async(ptr< cmd_result< ptr > > async_res); void drop_all_pending_commit_elems(); diff --git a/src/handle_client_request.cxx b/src/handle_client_request.cxx index 22cf6f0c..7c54f9c7 100644 --- a/src/handle_client_request.cxx +++ b/src/handle_client_request.cxx @@ -157,7 +157,7 @@ ptr raft_server::handle_cli_req(req_msg& req) { resp->set_async_cb ( std::bind( &raft_server::handle_cli_req_callback_async, this, - elem->async_result_, elem->idx_ ) ); + elem->async_result_ ) ); break; } } @@ -214,18 +214,9 @@ ptr raft_server::handle_cli_req_callback(ptr elem, } ptr< cmd_result< ptr > > - raft_server::handle_cli_req_callback_async(ptr< cmd_result< ptr > > async_res, uint64_t elem_idx) + raft_server::handle_cli_req_callback_async(ptr< cmd_result< ptr > > async_res) { async_res->accept(); - /// If element was added into commit_ret_elems_ from commit thread we have - /// to remove it here to avoid memory leak. Otherwise it will be already - /// removed. - { auto_lock(commit_ret_elems_lock_); - auto it = commit_ret_elems_.find(elem_idx) - if (it != commit_ret_elems_.end()) { - commit_ret_elems_.erase(it); - } - } return async_res; }