-
Notifications
You must be signed in to change notification settings - Fork 6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Vault 6815/respect min quorum #23
Changes from 5 commits
7005cc3
874cbcc
32d3f81
e4d643c
08bf949
e9f4b47
7123c5c
0ef5700
1bfc302
029e90d
cd5e495
dbaeaff
2ff6a18
e1db596
47a597d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,7 @@ import ( | |
"fmt" | ||
"sort" | ||
|
||
"github.com/hashicorp/go-multierror" | ||
"github.com/hashicorp/raft" | ||
) | ||
|
||
|
@@ -22,7 +23,7 @@ func (a *Autopilot) reconcile() error { | |
state := a.GetState() | ||
|
||
if state == nil || state.Leader == "" { | ||
return fmt.Errorf("Cannote reconcile Raft server voting rights without a valid autopilot state") | ||
return fmt.Errorf("cannot reconcile Raft server voting rights without a valid autopilot state") | ||
} | ||
|
||
// have the promoter calculate the required Raft changeset. | ||
|
@@ -104,7 +105,7 @@ func (a *Autopilot) applyPromotions(state *State, changes RaftChanges) (bool, er | |
|
||
// when we promoted anything we return true to indicate that the promotion/demotion applying | ||
// process is finished to prevent promotions and demotions in the same round. This is what | ||
// autopilot within Consul used to do so I am keeping the behavior the same for now. | ||
// autopilot within Consul used to do, so I am keeping the behavior the same for now. | ||
return promoted, nil | ||
} | ||
|
||
|
@@ -151,36 +152,45 @@ func (a *Autopilot) applyDemotions(state *State, changes RaftChanges) (bool, err | |
return demoted, nil | ||
} | ||
|
||
// getFailedServers aggregates all of the information about servers that the consuming application believes are in | ||
// getFailedServers aggregates all the information about servers that the consuming application believes are in | ||
// a failed/left state (indicated by the NodeStatus field on the Server type) as well as stale servers that are | ||
// in the raft configuration but not know to the consuming application. This function will do nothing with | ||
// that information and is purely to collect the data. | ||
func (a *Autopilot) getFailedServers() (*FailedServers, int, error) { | ||
func (a *Autopilot) getFailedServers() (*FailedServers, *VoterRegistry, error) { | ||
staleRaftServers := make(map[raft.ServerID]raft.Server) | ||
raftConfig, err := a.getRaftConfiguration() | ||
if err != nil { | ||
return nil, 0, err | ||
return nil, nil, err | ||
} | ||
|
||
// Populate a map of all the raft servers. We will | ||
// remove some later on from the map leaving us with | ||
// just the stale servers. | ||
var voters int | ||
registry := NewVoterRegistry() | ||
|
||
for _, server := range raftConfig.Servers { | ||
staleRaftServers[server.ID] = server | ||
|
||
if server.Suffrage == raft.Voter { | ||
voters++ | ||
registry.Eligibility[server.ID] = &VoterEligibility{ | ||
currentVoter: server.Suffrage == raft.Voter, | ||
} | ||
} | ||
|
||
var failed FailedServers | ||
|
||
for id, srv := range a.delegate.KnownServers() { | ||
raftSrv, found := staleRaftServers[id] | ||
if found { | ||
delete(staleRaftServers, id) | ||
} else { | ||
// This server was known to the application, | ||
// but not in the Raft config, so will be ignored | ||
continue | ||
} | ||
|
||
// Update the potential suffrage using the supplied predicate. | ||
v := registry.Eligibility[id] | ||
v.SetPotentialVoter(a.promoter.IsPotentialVoter(srv.NodeType)) | ||
|
||
if srv.NodeStatus != NodeAlive { | ||
if found && raftSrv.Suffrage == raft.Voter { | ||
failed.FailedVoters = append(failed.FailedVoters, srv) | ||
|
@@ -211,7 +221,7 @@ func (a *Autopilot) getFailedServers() (*FailedServers, int, error) { | |
return failed.FailedVoters[i].ID < failed.FailedVoters[j].ID | ||
}) | ||
|
||
return &failed, voters, nil | ||
return &failed, registry, nil | ||
} | ||
|
||
// pruneDeadServers will find stale raft servers and failed servers as indicated by the consuming application | ||
|
@@ -220,7 +230,7 @@ func (a *Autopilot) getFailedServers() (*FailedServers, int, error) { | |
// removed first. Then stale voters and finally failed servers. For servers with voting rights we will | ||
// cap the number removed so that we do not remove too many at a time and do not remove nodes to the | ||
// point where the number of voters would be below the MinQuorum value from the autopilot config. | ||
// Additionally the delegate will be consulted to determine if all of the removals should be done and | ||
// Additionally, the delegate will be consulted to determine if all the removals should be done and | ||
// can filter the failed servers listings if need be. | ||
func (a *Autopilot) pruneDeadServers() error { | ||
if !a.ReconciliationEnabled() { | ||
|
@@ -234,54 +244,102 @@ func (a *Autopilot) pruneDeadServers() error { | |
|
||
state := a.GetState() | ||
|
||
failed, voters, err := a.getFailedServers() | ||
failed, vr, err := a.getFailedServers() | ||
if err != nil || failed == nil { | ||
return err | ||
} | ||
|
||
failed = a.promoter.FilterFailedServerRemovals(conf, state, failed) | ||
|
||
// remove failed non voting servers | ||
for _, srv := range failed.FailedNonVoters { | ||
a.logger.Info("Attempting removal of failed server node", "id", srv.ID, "name", srv.Name, "address", srv.Address) | ||
a.delegate.RemoveFailedServer(srv) | ||
// Remove servers in order of increasing precedence | ||
// Update the voter registry after each batch is processed | ||
|
||
// remove stale non-voters | ||
toRemove := a.adjudicateRemoval(failed.StaleNonVoters, vr) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess in principle you could now collapse these 4 calls into 2, e.g.
and similarly for the failed servers. Up to you, it's also fine as you have it. |
||
if err = a.removeStaleServers(toRemove); err != nil { | ||
return err | ||
} | ||
vr.RemoveAll(toRemove) | ||
|
||
// remove stale non voters | ||
for _, id := range failed.StaleNonVoters { | ||
a.logger.Debug("removing stale raft server from configuration", "id", id) | ||
if err := a.removeServer(id); err != nil { | ||
return err | ||
} | ||
// Remove stale voters | ||
toRemove = a.adjudicateRemoval(failed.StaleVoters, vr) | ||
if err = a.removeStaleServers(toRemove); err != nil { | ||
return err | ||
} | ||
vr.RemoveAll(toRemove) | ||
|
||
maxRemoval := (voters - 1) / 2 | ||
// remove failed non-voters | ||
failedNonVoters := vr.Filter(failed.FailedNonVoters) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the purpose of this filter call? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to filter the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Which ones do we want? I'm confused because it's just ensuring that these nodes are present in the map, and I'm not sure why that's needed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We want the collection of server ID ( |
||
toRemove = a.adjudicateRemoval(failedNonVoters, vr) | ||
a.removeFailedServers(failed.GetFailed(toRemove, false)) | ||
vr.RemoveAll(toRemove) | ||
|
||
// remove failed voters | ||
failedVoters := vr.Filter(failed.FailedVoters) | ||
toRemove = a.adjudicateRemoval(failedVoters, vr) | ||
a.removeFailedServers(failed.GetFailed(toRemove, true)) | ||
vr.RemoveAll(toRemove) | ||
|
||
return nil | ||
} | ||
|
||
func (a *Autopilot) adjudicateRemoval(ids []raft.ServerID, vr *VoterRegistry) []raft.ServerID { | ||
var result []raft.ServerID | ||
initialPotentialVoters := vr.PotentialVoters() | ||
removedPotentialVoters := 0 | ||
maxRemoval := (initialPotentialVoters - 1) / 2 | ||
minQuorum := a.delegate.AutopilotConfig().MinQuorum | ||
|
||
for _, id := range ids { | ||
v, found := vr.Eligibility[id] | ||
if !found { | ||
peteski22 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
for _, id := range failed.StaleVoters { | ||
if voters-1 < int(conf.MinQuorum) { | ||
a.logger.Debug("will not remove server as it would leave less voters than the minimum number allowed", "id", id, "min", conf.MinQuorum) | ||
} else if maxRemoval < 1 { | ||
a.logger.Debug("will not remove server as removal of a majority of servers is not safe", "id", id) | ||
} else if err := a.removeServer(id); err != nil { | ||
return err | ||
} else { | ||
maxRemoval-- | ||
voters-- | ||
} | ||
} | ||
|
||
for _, srv := range failed.FailedVoters { | ||
if voters-1 < int(conf.MinQuorum) { | ||
a.logger.Debug("will not remove server as it would leave less voters than the minimum number allowed", "id", srv.ID, "min", conf.MinQuorum) | ||
} else if maxRemoval < 1 { | ||
a.logger.Debug("will not remove server as a removal of a majority of servers is not safe", "id", srv.ID) | ||
} else { | ||
a.logger.Info("Attempting removal of failed server node", "id", srv.ID, "name", srv.Name, "address", srv.Address) | ||
a.delegate.RemoveFailedServer(srv) | ||
if v != nil && v.IsPotentialVoter() && initialPotentialVoters-removedPotentialVoters-1 < int(minQuorum) { | ||
a.logger.Debug("will not remove server node as it would leave less voters than the minimum number allowed", "id", id, "min", minQuorum) | ||
} else if v.IsCurrentVoter() && maxRemoval < 1 { | ||
a.logger.Debug("will not remove server node as removal of a majority of voting servers is not safe", "id", id) | ||
} else if v != nil && v.IsCurrentVoter() { | ||
maxRemoval-- | ||
voters-- | ||
// We need to track how many voters we have removed from the registry | ||
// to ensure the total remaining potential voters is accurate | ||
removedPotentialVoters++ | ||
result = append(result, id) | ||
} else { | ||
result = append(result, id) | ||
} | ||
} | ||
|
||
return result | ||
} | ||
|
||
func (a *Autopilot) removeStaleServer(id raft.ServerID) error { | ||
a.logger.Debug("removing server by ID", "id", id) | ||
future := a.raft.RemoveServer(id, 0, 0) | ||
if err := future.Error(); err != nil { | ||
a.logger.Error("failed to remove raft server", "id", id, "error", err) | ||
return err | ||
} | ||
a.logger.Info("removed server", "id", id) | ||
return nil | ||
} | ||
|
||
func (a *Autopilot) removeStaleServers(toRemove []raft.ServerID) error { | ||
var result error | ||
|
||
for _, id := range toRemove { | ||
err := a.removeStaleServer(id) | ||
if err != nil { | ||
result = multierror.Append(result, err) | ||
} | ||
} | ||
|
||
return result | ||
} | ||
|
||
func (a *Autopilot) removeFailedServers(toRemove []*Server) { | ||
for _, srv := range toRemove { | ||
a.delegate.RemoveFailedServer(srv) | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why are we changing the order here, e.g. before FailedNonVoters were considered first, now we're looking at them after the stale servers?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess what I'm asking is: what's the basis for the "precedence" ordering you're using?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe (and may well be totally wrong) it should be that stale servers are the lowest 'value', as to the application those servers don't exist (in its view).
So first removing stale servers (non-voting and then voting) when possible, should be done before we attempt to remove failed servers (that the app does know about and could potentially become healthy again).
A failed non-voter is still (most likely) a potential voter, a stale voter or non-voter won't be.
Might be one which requires more thought/conversation :D
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure that's true in Vault's case at least. A stale server could simply be one whose echo message we haven't seen yet. So it might be stale now, and in 10s it won't be.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Vault would be the 'application' in this scenario, and Vault's chance to say which servers should/shouldn't be part of the raft config is via
KnownServers
which it supplies. Naming both healthy and failed servers.So if the server isn't in that collection of known servers, but is in the Raft configuration, then it's stale (from autopilot's perspective).. and needs to be removed.
I may have been in this code for too long on my own, but that was how it seemed to me to work.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So here in Vault's code a server is either healthy (alive) or failed (left) https://github.com/hashicorp/vault/blob/main/physical/raft/raft_autopilot.go#L428. If the server isn't in the map of servers, it's stale to Autopilot.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I went back and forth on this for a while. There are tradeoffs here. If we remove stale first then we are moving things closer to the applications desired cluster config. However, this does involve messing with the raft config which is inherently a bit riskier than telling the application to remove a failed server. However, at least in Consul's usage, the request to remove the failed server will almost always result in the server being removed from the raft config very quickly after.
For Consul's case the only time when one of those operations will not result in a raft config update is when the failed server isn't actually in the raft config yet. This seems like an extreme edge case and not something we need to explicitly account for.
So in the end I think we should follow two rules:
The code as implemented does follow those rules. It would be great if you could add a comment about the ordering somewhere in here though (which I should have done when originally writing the library)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've added the rules you've mentioned in a comment in
pruneDeadServers
now, so hopefully the intent is documented to some extent.