diff --git a/include/libnuraft/context.hxx b/include/libnuraft/context.hxx index c5c70f90..e880a8d9 100644 --- a/include/libnuraft/context.hxx +++ b/include/libnuraft/context.hxx @@ -55,15 +55,34 @@ public: , params_( cs_new(params) ) {} + /** + * Register an event callback function. + * + * @param func Callback function to register. + */ void set_cb_func(cb_func::func_type func) { cb_func_ = cb_func(func); } + /** + * Return the pointer to current Raft parameters. + * + * WARNING: + * It is just a pointer so that the contents + * shouldn't be changed directly. + * + * @return Pointer to parameter instance. + */ ptr get_params() const { std::lock_guard l(ctx_lock_); return params_; } + /** + * Update Raft parameters. + * + * @param to New Raft parameters to set. + */ void set_params(ptr& to) { std::lock_guard l(ctx_lock_); params_ = to; @@ -72,31 +91,49 @@ public: __nocopy__(context); public: - // State manager instance. + /** + * State manager instance. + */ ptr state_mgr_; - // State machine instance. + /** + * State machine instance. + */ ptr state_machine_; - // RPC listener instance. + /** + * RPC listener instance. + */ ptr rpc_listener_; - // System logger instance. + /** + * System logger instance. + */ ptr logger_; - // RPC client factory. + /** + * RPC client factory. + */ ptr rpc_cli_factory_; - // Timer instance. + /** + * Timer instance. + */ ptr scheduler_; - // Raft parameters. + /** + * Raft parameters. + */ std::shared_ptr params_; - // Callback function for hooking the operation. + /** + * Callback function for hooking the operation. + */ cb_func cb_func_; - // Lock. + /** + * Lock. + */ mutable std::mutex ctx_lock_; }; diff --git a/include/libnuraft/peer.hxx b/include/libnuraft/peer.hxx index c0840962..036dff8e 100644 --- a/include/libnuraft/peer.hxx +++ b/include/libnuraft/peer.hxx @@ -36,20 +36,28 @@ namespace nuraft { class snapshot; class peer { public: - // Max number of warnings before suppressing it. + /** + * Max number of warnings before suppressing it. + */ static const int32 WARNINGS_LIMIT = 20; - // If a node is not responding more than this limit, - // we treat that node as dead. + /** + * If a node is not responding more than this limit, + * we treat that node as dead. + */ static const int32 RESPONSE_LIMIT = 20; - // If connection is silent longer than this limit - // (multiplied by heartbeat interval), re-establish - // the connection. + /** + * If connection is silent longer than this limit + * (multiplied by heartbeat interval), re-establish + * the connection. + */ static const int32 RECONNECT_LIMIT = 50; - // If removed node is not responding more than this limit, - // just force remove it from server list. + /** + * If removed node is not responding more than this limit, + * just force remove it from server list. + */ static const int32 LEAVE_LIMIT = 5; peer( ptr& config, @@ -314,123 +322,197 @@ private: ptr& resp, ptr& err); - // Information (config) of this server. + /** + * Information (config) of this server. + */ ptr config_; - // Heartbeat scheduler for this server. + /** + * Heartbeat scheduler for this server. + */ ptr scheduler_; - // RPC client to this server. + /** + * RPC client to this server. + */ ptr rpc_; - // Guard of `rpc_`. + /** + * Guard of `rpc_`. + */ std::mutex rpc_protector_; - // Current heartbeat interval after adding back-off. + /** + * Current heartbeat interval after adding back-off. + */ std::atomic current_hb_interval_; - // Original heartbeat interval. + /** + * Original heartbeat interval. + */ int32 hb_interval_; - // RPC backoff. + /** + * RPC backoff. + */ int32 rpc_backoff_; - // Upper limit of heartbeat interval. + /** + * Upper limit of heartbeat interval. + */ int32 max_hb_interval_; - // Next log index of this server. + /** + * Next log index of this server. + */ std::atomic next_log_idx_; - // Hint of the next log batch size in bytes. + /** + * Hint of the next log batch size in bytes. + */ std::atomic next_batch_size_hint_in_bytes_; - // The last log index whose term matches up with the leader. + /** + * The last log index whose term matches up with the leader. + */ ulong matched_idx_; - // `true` if we sent message to this server and waiting for - // the response. + /** + * `true` if we sent message to this server and waiting for + * the response. + */ std::atomic busy_flag_; - // `true` if we need to send follow-up request immediately - // for commiting logs. + /** + * `true` if we need to send follow-up request immediately + * for commiting logs. + */ std::atomic pending_commit_flag_; - // `true` if heartbeat is enabled. + /** + * `true` if heartbeat is enabled. + */ bool hb_enabled_; - // Heartbeat task. + /** + * Heartbeat task. + */ ptr hb_task_; - // Snapshot context if snapshot transmission is in progress. + /** + * Snapshot context if snapshot transmission is in progress. + */ ptr snp_sync_ctx_; - // Lock for this peer. + /** + * Lock for this peer. + */ std::mutex lock_; // --- For tracking long pause --- - // Timestamp when the last request was sent. + /** + * Timestamp when the last request was sent. + */ timer_helper last_sent_timer_; - // Timestamp when the last (successful) response was received. + /** + * Timestamp when the last (successful) response was received. + */ timer_helper last_resp_timer_; - // Timestamp when the last active network activity was detected. + /** + * Timestamp when the last active network activity was detected. + */ timer_helper last_active_timer_; - // Counter of long pause warnings. + /** + * Counter of long pause warnings. + */ std::atomic long_pause_warnings_; - // Counter of recoveries after long pause. + /** + * Counter of recoveries after long pause. + */ std::atomic network_recoveries_; - // `true` if user manually clear the `busy_flag_` before - // getting response from this server. + /** + * `true` if user manually clear the `busy_flag_` before + * getting response from this server. + */ std::atomic manual_free_; - // For tracking RPC error. + /** + * For tracking RPC error. + */ std::atomic rpc_errs_; - // Start log index of the last sent append entries request. + /** + * Start log index of the last sent append entries request. + */ std::atomic last_sent_idx_; - // Number of count where start log index is the same as previous. + /** + * Number of count where start log index is the same as previous. + */ std::atomic cnt_not_applied_; - // True if leave request has been sent to this peer. + /** + * `true` if leave request has been sent to this peer. + */ std::atomic leave_requested_; - // Number of HB timeout after leave requested. + /** + * Number of HB timeout after leave requested. + */ std::atomic hb_cnt_since_leave_; - // True if this peer responded to leave request so that - // will be removed from cluster soon. - // To avoid HB timer trying to do something with this peer. + /** + * `true` if this peer responded to leave request so that + * will be removed from cluster soon. + * To avoid HB timer trying to do something with this peer. + */ std::atomic stepping_down_; - // For re-connection. + /** + * For re-connection. + */ std::atomic reconn_scheduled_; - // Back-off timer to avoid superfluous reconnection. + /** + * Back-off timer to avoid superfluous reconnection. + */ timer_helper reconn_timer_; - // For exp backoff of reconnection. + /** + * For exp backoff of reconnection. + */ timer_helper reconn_backoff_; - // If `true`, we will lower the log level of the RPC error - // from this server. + /** + * If `true`, we will lower the log level of the RPC error + * from this server. + */ std::atomic suppress_following_error_; - // if `true`, this peer is removed and shut down. - // All operations on this peer should be rejected. + /** + * if `true`, this peer is removed and shut down. + * All operations on this peer should be rejected. + */ std::atomic abandoned_; - // Reserved message that should be sent next time. + /** + * Reserved message that should be sent next time. + */ ptr rsv_msg_; - // Handler for reserved message. + /** + * Handler for reserved message. + */ rpc_handler rsv_msg_handler_; - // Logger instance. + /** + * Logger instance. + */ ptr l_; }; diff --git a/include/libnuraft/raft_server.hxx b/include/libnuraft/raft_server.hxx index fcc560eb..96927c2b 100644 --- a/include/libnuraft/raft_server.hxx +++ b/include/libnuraft/raft_server.hxx @@ -620,258 +620,396 @@ protected: protected: static const int default_snapshot_sync_block_size; - // (Read-only) - // Background thread for commit and snapshot. + /** + * (Read-only) + * Background thread for commit and snapshot. + */ std::thread bg_commit_thread_; - // (Read-only) - // Background thread for sending quick append entry request. + /** + * (Read-only) + * Background thread for sending quick append entry request. + */ std::thread bg_append_thread_; - // Condition variable to invoke append thread. + /** + * Condition variable to invoke append thread. + */ EventAwaiter* bg_append_ea_; - // `true` if this server is ready to serve operation. + /** + * `true` if this server is ready to serve operation. + */ std::atomic initialized_; - // Current leader ID. - // If leader currently does not exist, it will be -1. + /** + * Current leader ID. + * If leader currently does not exist, it will be -1. + */ std::atomic leader_; - // (Read-only) - // ID of this server. + /** + * (Read-only) + * ID of this server. + */ int32 id_; - // Current priority of this server, protected by `lock_`. + /** + * Current priority of this server, protected by `lock_`. + */ int32 my_priority_; - // Current target priority for vote, protected by `lock_`. + /** + * Current target priority for vote, protected by `lock_`. + */ int32 target_priority_; - // Timer that will be reset on `target_priority_` change. + /** + * Timer that will be reset on `target_priority_` change. + */ timer_helper priority_change_timer_; - // Number of servers responded my vote request, protected by `lock_`. + /** + * Number of servers responded my vote request, protected by `lock_`. + */ int32 votes_responded_; - // Number of servers voted for me, protected by `lock_`. + /** + * Number of servers voted for me, protected by `lock_`. + */ int32 votes_granted_; - // Last pre-committed index. + /** + * Last pre-committed index. + */ std::atomic precommit_index_; - // Leader commit index, seen by this node last time. - // Only valid when the current role is `follower`. + /** + * Leader commit index, seen by this node last time. + * Only valid when the current role is `follower`. + */ std::atomic leader_commit_index_; - // Target commit index. - // This value will be basically the same as `leader_commit_index_`. - // However, if the current role is `follower` and this node's log - // is far behind leader so that requires further catch-up, this - // value can be adjusted to the last log index number of the current - // node, which might be smaller than `leader_commit_index_` value. + /** + * Target commit index. + * This value will be basically the same as `leader_commit_index_`. + * However, if the current role is `follower` and this node's log + * is far behind leader so that requires further catch-up, this + * value can be adjusted to the last log index number of the current + * node, which might be smaller than `leader_commit_index_` value. + */ std::atomic quick_commit_index_; - // Actual commit index of state machine. + /** + * Actual commit index of state machine. + */ std::atomic sm_commit_index_; - // (Read-only) - // Initial commit index when this server started. + /** + * (Read-only) + * Initial commit index when this server started. + */ ulong initial_commit_index_; - // `true` if this server is seeing alive leader. + /** + * `true` if this server is seeing alive leader. + */ std::atomic hb_alive_; - // Current status of pre-vote, protected by `lock_`. + /** + * Current status of pre-vote, protected by `lock_`. + */ pre_vote_status_t pre_vote_; - // `false` if currently leader election is not in progress, - // protected by `lock_`. + /** + * `false` if currently leader election is not in progress, + * protected by `lock_`. + */ bool election_completed_; - // `true` if there is uncommitted config, which will - // reject the configuration change. - // Protected by `lock_`. + /** + * `true` if there is uncommitted config, which will + * reject the configuration change. + * Protected by `lock_`. + */ bool config_changing_; - // `true` if this server falls behind leader so that - // catching up the latest log. It will not receive - // normal `append_entries` request while in catch-up status. + /** + * `true` if this server falls behind leader so that + * catching up the latest log. It will not receive + * normal `append_entries` request while in catch-up status. + */ std::atomic catching_up_; - // `true` if this server receives out of log range message - // from leader. Once this flag is set, this server will not - // initiate leader election. + /** + * `true` if this server receives out of log range message + * from leader. Once this flag is set, this server will not + * initiate leader election. + */ std::atomic out_of_log_range_; - // `true` if this is a follower and its committed log index is close enough - // to the leader's committed log index, so the data is fresh enough. + /** + * `true` if this is a follower and its committed log index is close enough + * to the leader's committed log index, so the data is fresh enough. + */ std::atomic data_fresh_; - // `true` if this server is terminating. - // Will not accept any request. + /** + * `true` if this server is terminating. + * Will not accept any request. + */ std::atomic stopping_; - // `true` if background commit thread has been terminated. + /** + * `true` if background commit thread has been terminated. + */ std::atomic commit_bg_stopped_; - // `true` if background append thread has been terminated. + /** + * `true` if background append thread has been terminated. + */ std::atomic append_bg_stopped_; - // `true` if write operation is paused, as the first phase of - // leader re-election. + /** + * `true` if write operation is paused, as the first phase of + * leader re-election. + */ std::atomic write_paused_; - // Server ID indicates the candidate for the next leader, - // as a part of leadership takeover task. + /** + * Server ID indicates the candidate for the next leader, + * as a part of leadership takeover task. + */ std::atomic next_leader_candidate_; - // Timer that will start at pausing write. + /** + * Timer that will start at pausing write. + */ timer_helper reelection_timer_; - // (Read-only) - // `true` if this server is a learner. Will not participate - // leader election. + /** + * (Read-only) + * `true` if this server is a learner. Will not participate + * leader election. + */ bool im_learner_; - // `true` if this server is in the middle of - // `append_entries` handler. + /** + * `true` if this server is in the middle of + * `append_entries` handler. + */ std::atomic serving_req_; - // Number of steps remaining to turn off this server. - // Will be triggered once this server is removed from the cluster. - // Protected by `lock_`. + /** + * Number of steps remaining to turn off this server. + * Will be triggered once this server is removed from the cluster. + * Protected by `lock_`. + */ int32 steps_to_down_; - // `true` if this server is creating a snapshot. + /** + * `true` if this server is creating a snapshot. + */ std::atomic snp_in_progress_; - // (Read-only, but its contents will change) - // Server context. + /** + * (Read-only, but its contents will change) + * Server context. + */ std::unique_ptr ctx_; - // Scheduler. + /** + * Scheduler. + */ ptr scheduler_; - // Election timeout handler. + /** + * Election timeout handler. + */ timer_task::executor election_exec_; - // Election timer. + /** + * Election timer. + */ ptr election_task_; - // The time when the election timer was reset last time. + /** + * The time when the election timer was reset last time. + */ timer_helper last_election_timer_reset_; - // Map of {Server ID, `peer` instance}, - // protected by `lock_`. + /** + * Map of {Server ID, `peer` instance}, + * protected by `lock_`. + */ std::unordered_map> peers_; - // Map of {server ID, connection to corresponding server}, - // protected by `lock_`. + /** + * Map of {server ID, connection to corresponding server}, + * protected by `lock_`. + */ std::unordered_map> rpc_clients_; - // Current role of this server. + /** + * Current role of this server. + */ std::atomic role_; - // (Read-only, but its contents will change) - // Server status (term and vote). + /** + * (Read-only, but its contents will change) + * Server status (term and vote). + */ ptr state_; - // (Read-only) - // Log store instance. + /** + * (Read-only) + * Log store instance. + */ ptr log_store_; - // (Read-only) - // State machine instance. + /** + * (Read-only) + * State machine instance. + */ ptr state_machine_; - // `true` if this server is receiving a snapshot. + /** + * `true` if this server is receiving a snapshot. + */ std::atomic receiving_snapshot_; - // Election timeout count while receiving snapshot. - // This happens when the sender (i.e., leader) is too slow - // so that cannot send message before election timeout. + /** + * Election timeout count while receiving snapshot. + * This happens when the sender (i.e., leader) is too slow + * so that cannot send message before election timeout. + */ std::atomic et_cnt_receiving_snapshot_; - // (Read-only) - // Logger instance. + /** + * (Read-only) + * Logger instance. + */ ptr l_; - // (Read-only) - // Random generator for timeout. + /** + * (Read-only) + * Random generator for timeout. + */ std::function rand_timeout_; - // Previous config for debugging purpose, protected by `config_lock_`. + /** + * Previous config for debugging purpose, protected by `config_lock_`. + */ ptr stale_config_; - // Current (committed) cluster config, protected by `config_lock_`. + /** + * Current (committed) cluster config, protected by `config_lock_`. + */ ptr config_; - // Lock for cluster config. + /** + * Lock for cluster config. + */ mutable std::mutex config_lock_; - // Latest uncommitted cluster config changed from `config_`, - // protected by `lock_`. `nullptr` if `config_` is the latest one. + /** + * Latest uncommitted cluster config changed from `config_`, + * protected by `lock_`. `nullptr` if `config_` is the latest one. + */ ptr uncommitted_config_; - // Server that is preparing to join, - // protected by `lock_`. + /** + * Server that is preparing to join, + * protected by `lock_`. + */ ptr srv_to_join_; - // Server that is agreed to leave, - // protected by `lock_`. + /** + * Server that is agreed to leave, + * protected by `lock_`. + */ ptr srv_to_leave_; - // Target log index number containing the config that - // this server is actually removed. - // Connection to `srv_to_leave_` should be kept until this log. + /** + * Target log index number containing the config that + * this server is actually removed. + * Connection to `srv_to_leave_` should be kept until this log. + */ ulong srv_to_leave_target_idx_; - // Config of the server preparing to join, - // protected by `lock_`. + /** + * Config of the server preparing to join, + * protected by `lock_`. + */ ptr conf_to_add_; - // Lock of entire Raft operation. + /** + * Lock of entire Raft operation. + */ std::recursive_mutex lock_; - // Lock of handling client request and role change. + /** + * Lock of handling client request and role change. + */ std::mutex cli_lock_; - // Condition variable to invoke BG commit thread. + /** + * Condition variable to invoke BG commit thread. + */ std::condition_variable commit_cv_; - // Lock for `commit_cv_`. + /** + * Lock for `commit_cv_`. + */ std::mutex commit_cv_lock_; - // Lock for auto forwarding. + /** + * Lock for auto forwarding. + */ std::mutex rpc_clients_lock_; - // Client requests waiting for replication. - // Only used in blocking mode. + /** + * Client requests waiting for replication. + * Only used in blocking mode. + */ std::map> commit_ret_elems_; - // Lock for `commit_ret_elems_`. + /** + * Lock for `commit_ret_elems_`. + */ std::mutex commit_ret_elems_lock_; - // Condition variable to invoke Raft server for - // notifying the termination of BG commit thread. + /** + * Condition variable to invoke Raft server for + * notifying the termination of BG commit thread. + */ std::condition_variable ready_to_stop_cv_; - // Lock for `read_to_stop_cv_`. + /** + * Lock for `read_to_stop_cv_`. + */ std::mutex ready_to_stop_cv_lock_; - // (Read-only) - // Response handler. + /** + * (Read-only) + * Response handler. + */ rpc_handler resp_handler_; - // (Read-only) - // Extended response handler. + /** + * (Read-only) + * Extended response handler. + */ rpc_handler ex_resp_handler_; - // Last snapshot instance. + /** + * Last snapshot instance. + */ ptr last_snapshot_; - // Lock for `last_snapshot_`. + /** + * Lock for `last_snapshot_`. + */ mutable std::mutex last_snapshot_lock_; }; diff --git a/include/libnuraft/srv_config.hxx b/include/libnuraft/srv_config.hxx index 43a24a65..6dd80ddc 100644 --- a/include/libnuraft/srv_config.hxx +++ b/include/libnuraft/srv_config.hxx @@ -82,27 +82,39 @@ public: ptr serialize() const; private: - // ID of this server, should be positive number. + /** + * ID of this server, should be positive number. + */ int32 id_; - // ID of datacenter where this server is located. - // 0 if not used. + /** + * ID of datacenter where this server is located. + * 0 if not used. + */ int32 dc_id_; - // Endpoint (address + port). + /** + * Endpoint (address + port). + */ std::string endpoint_; - // Custom string given by user. - // WARNING: It SHOULD NOT contain NULL character, - // as it will be stored as a C-style string. + /** + * Custom string given by user. + * WARNING: It SHOULD NOT contain NULL character, + * as it will be stored as a C-style string. + */ std::string aux_; - // `true` if this node is learner. - // Learner will not initiate or participate in leader election. + /** + * `true` if this node is learner. + * Learner will not initiate or participate in leader election. + */ bool learner_; - // Priority of this node. - // 0 will never be a leader. + /** + * Priority of this node. + * 0 will never be a leader. + */ int32 priority_; };