Skip to content

Commit

Permalink
Mitigate against token replication lag (#102)
Browse files Browse the repository at this point in the history
  • Loading branch information
Paul Glass authored May 2, 2022
1 parent a2d97cb commit 4627171
Showing 1 changed file with 30 additions and 2 deletions.
32 changes: 30 additions & 2 deletions modules/mesh-task/templates/consul_client_command.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ echo "$CONSUL_CACERT" > /consul/consul-ca-cert.pem

%{ if acls && client_token_auth_method_name != "" ~}

login() {
consul_login() {
echo "Logging into auth method: name=${ client_token_auth_method_name }"
consul login \
-http-addr ${ consul_http_addr } \
Expand All @@ -26,10 +26,38 @@ login() {
-token-sink-file /consul/client-token
}

while ! login; do
read_token_stale() {
consul acl token read -http-addr ${ consul_http_addr } \
%{ if tls ~}
-ca-file /consul/consul-ca-cert.pem \
%{ endif ~}
-stale -self -token-file /consul/client-token \
&> /dev/null
}

# Retry in order to login successfully.
while ! consul_login; do
sleep 2
done

# Wait for raft replication to hopefully occur. Without this, an "ACL not found" may be cached for a while.
# Technically, the problem could still occur but this should handle most cases.
# This waits at most 2s (20 attempts with 0.1s sleep)
COUNT=20
while [ "$COUNT" -gt 0 ]; do
echo "Checking that the ACL token exists when reading it in the stale consistency mode ($COUNT attempts remaining)"
if read_token_stale; then
echo "Successfully read ACL token from the server"
break
fi
sleep 0.1
COUNT=$((COUNT - 1))
done
if [ "$COUNT" -eq 0 ]; then
echo "Unable to read ACL token from a Consul server; please check that your server cluster is healthy"
exit 1
fi

# This is an env var which is interpolated into the agent-defaults.hcl
export AGENT_TOKEN=$(cat /consul/client-token)
%{ endif ~}
Expand Down

0 comments on commit 4627171

Please sign in to comment.