Skip to content

Commit

Permalink
Merge pull request #9009 from hashicorp/update-secondary-ca
Browse files Browse the repository at this point in the history
connect: Fix an issue with updating CA config in a secondary datacenter
  • Loading branch information
kyhavlov committed Dec 1, 2020
1 parent 5d71580 commit 6e62166
Show file tree
Hide file tree
Showing 17 changed files with 1,675 additions and 1,025 deletions.
25 changes: 1 addition & 24 deletions agent/connect/ca/provider_consul_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,30 +21,7 @@ func (c *consulCAMockDelegate) State() *state.Store {
}

func (c *consulCAMockDelegate) ApplyCARequest(req *structs.CARequest) (interface{}, error) {
idx, _, err := c.state.CAConfig(nil)
if err != nil {
return nil, err
}

switch req.Op {
case structs.CAOpSetProviderState:
_, err := c.state.CASetProviderState(idx+1, req.ProviderState)
if err != nil {
return nil, err
}

return true, nil
case structs.CAOpDeleteProviderState:
if err := c.state.CADeleteProviderState(req.ProviderState.ID); err != nil {
return nil, err
}

return true, nil
case structs.CAOpIncrementProviderSerialNumber:
return uint64(2), nil
default:
return nil, fmt.Errorf("Invalid CA operation '%s'", req.Op)
}
return ApplyCARequestToStore(c.state, req)
}

func newMockDelegate(t *testing.T, conf *structs.CAConfiguration) *consulCAMockDelegate {
Expand Down
2 changes: 1 addition & 1 deletion agent/connect/ca/provider_vault.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ func (v *VaultProvider) renewToken(ctx context.Context, watcher *vaultapi.Lifeti
go watcher.Start()

case <-watcher.RenewCh():
v.logger.Error("Successfully renewed token for Vault provider")
v.logger.Info("Successfully renewed token for Vault provider")
}
}
}
Expand Down
29 changes: 29 additions & 0 deletions agent/connect/ca/testing.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import (
"sync"

"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/sdk/freeport"
"github.com/hashicorp/consul/sdk/testutil/retry"
"github.com/hashicorp/go-hclog"
Expand Down Expand Up @@ -234,3 +236,30 @@ func (v *TestVaultServer) Stop() error {

return nil
}

func ApplyCARequestToStore(store *state.Store, req *structs.CARequest) (interface{}, error) {
idx, _, err := store.CAConfig(nil)
if err != nil {
return nil, err
}

switch req.Op {
case structs.CAOpSetProviderState:
_, err := store.CASetProviderState(idx+1, req.ProviderState)
if err != nil {
return nil, err
}

return true, nil
case structs.CAOpDeleteProviderState:
if err := store.CADeleteProviderState(req.ProviderState.ID); err != nil {
return nil, err
}

return true, nil
case structs.CAOpIncrementProviderSerialNumber:
return uint64(2), nil
default:
return nil, fmt.Errorf("Invalid CA operation '%s'", req.Op)
}
}
24 changes: 19 additions & 5 deletions agent/connect/testing_ca.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ func ValidateLeaf(caPEM string, leafPEM string, intermediatePEMs []string) error
return err
}

func testCA(t testing.T, xc *structs.CARoot, keyType string, keyBits int) *structs.CARoot {
func testCA(t testing.T, xc *structs.CARoot, keyType string, keyBits int, ttl time.Duration) *structs.CARoot {
var result structs.CARoot
result.Active = true
result.Name = fmt.Sprintf("Test CA %d", atomic.AddUint64(&testCACounter, 1))
Expand All @@ -76,6 +76,14 @@ func testCA(t testing.T, xc *structs.CARoot, keyType string, keyBits int) *struc
id := &SpiffeIDSigning{ClusterID: TestClusterID, Domain: "consul"}

// Create the CA cert
now := time.Now()
before := now
after := now
if ttl != 0 {
after = after.Add(ttl)
} else {
after = after.AddDate(10, 0, 0)
}
template := x509.Certificate{
SerialNumber: sn,
Subject: pkix.Name{CommonName: result.Name},
Expand All @@ -85,8 +93,8 @@ func testCA(t testing.T, xc *structs.CARoot, keyType string, keyBits int) *struc
x509.KeyUsageCRLSign |
x509.KeyUsageDigitalSignature,
IsCA: true,
NotAfter: time.Now().AddDate(10, 0, 0),
NotBefore: time.Now(),
NotAfter: after,
NotBefore: before,
AuthorityKeyId: testKeyID(t, signer.Public()),
SubjectKeyId: testKeyID(t, signer.Public()),
}
Expand Down Expand Up @@ -159,13 +167,19 @@ func testCA(t testing.T, xc *structs.CARoot, keyType string, keyBits int) *struc
// that is cross-signed with the previous cert, and this will be set as
// SigningCert.
func TestCA(t testing.T, xc *structs.CARoot) *structs.CARoot {
return testCA(t, xc, DefaultPrivateKeyType, DefaultPrivateKeyBits)
return testCA(t, xc, DefaultPrivateKeyType, DefaultPrivateKeyBits, 0)
}

// TestCAWithTTL is similar to TestCA, except that it
// takes a custom duration for the lifetime of the certificate.
func TestCAWithTTL(t testing.T, xc *structs.CARoot, ttl time.Duration) *structs.CARoot {
return testCA(t, xc, DefaultPrivateKeyType, DefaultPrivateKeyBits, ttl)
}

// TestCAWithKeyType is similar to TestCA, except that it
// takes two additional arguments to override the default private key type and size.
func TestCAWithKeyType(t testing.T, xc *structs.CARoot, keyType string, keyBits int) *structs.CARoot {
return testCA(t, xc, keyType, keyBits)
return testCA(t, xc, keyType, keyBits, 0)
}

// testCertID is an interface to be implemented the various spiffe ID / CertURI types
Expand Down
8 changes: 7 additions & 1 deletion agent/connect/testing_spiffe.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,16 @@ func TestSpiffeIDService(t testing.T, service string) *SpiffeIDService {
// TestSpiffeIDServiceWithHost returns a SPIFFE ID representing a service with
// the specified trust domain.
func TestSpiffeIDServiceWithHost(t testing.T, service, host string) *SpiffeIDService {
return TestSpiffeIDServiceWithHostDC(t, service, host, "dc1")
}

// TestSpiffeIDServiceWithHostDC returns a SPIFFE ID representing a service with
// the specified trust domain for the given datacenter.
func TestSpiffeIDServiceWithHostDC(t testing.T, service, host, datacenter string) *SpiffeIDService {
return &SpiffeIDService{
Host: host,
Namespace: "default",
Datacenter: "dc1",
Datacenter: datacenter,
Service: service,
}
}
215 changes: 2 additions & 213 deletions agent/consul/connect_ca_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,12 @@ package consul
import (
"errors"
"fmt"
"reflect"
"time"

"github.com/hashicorp/go-hclog"

"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/connect/ca"
"github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/go-memdb"
Expand Down Expand Up @@ -103,216 +101,7 @@ func (s *ConnectCA) ConfigurationSet(
return acl.ErrPermissionDenied
}

// Exit early if it's a no-op change
state := s.srv.fsm.State()
confIdx, config, err := state.CAConfig(nil)
if err != nil {
return err
}

// Don't allow state changes. Either it needs to be empty or the same to allow
// read-modify-write loops that don't touch the State field.
if len(args.Config.State) > 0 &&
!reflect.DeepEqual(args.Config.State, config.State) {
return ErrStateReadOnly
}

// Don't allow users to change the ClusterID.
args.Config.ClusterID = config.ClusterID
if args.Config.Provider == config.Provider && reflect.DeepEqual(args.Config.Config, config.Config) {
return nil
}

// If the provider hasn't changed, we need to load the current Provider state
// so it can decide if it needs to change resources or not based on the config
// change.
if args.Config.Provider == config.Provider {
// Note this is a shallow copy since the State method doc requires the
// provider return a map that will not be further modified and should not
// modify the one we pass to Configure.
args.Config.State = config.State
}

// Create a new instance of the provider described by the config
// and get the current active root CA. This acts as a good validation
// of the config and makes sure the provider is functioning correctly
// before we commit any changes to Raft.
newProvider, err := s.srv.createCAProvider(args.Config)
if err != nil {
return fmt.Errorf("could not initialize provider: %v", err)
}
pCfg := ca.ProviderConfig{
ClusterID: args.Config.ClusterID,
Datacenter: s.srv.config.Datacenter,
// This endpoint can be called in a secondary DC too so set this correctly.
IsPrimary: s.srv.config.Datacenter == s.srv.config.PrimaryDatacenter,
RawConfig: args.Config.Config,
State: args.Config.State,
}
if err := newProvider.Configure(pCfg); err != nil {
return fmt.Errorf("error configuring provider: %v", err)
}

// Set up a defer to clean up the new provider if we exit early due to an error.
cleanupNewProvider := true
defer func() {
if cleanupNewProvider {
if err := newProvider.Cleanup(); err != nil {
s.logger.Warn("failed to clean up CA provider while handling startup failure", "provider", newProvider, "error", err)
}
}
}()

if err := newProvider.GenerateRoot(); err != nil {
return fmt.Errorf("error generating CA root certificate: %v", err)
}

newRootPEM, err := newProvider.ActiveRoot()
if err != nil {
return err
}

newActiveRoot, err := parseCARoot(newRootPEM, args.Config.Provider, args.Config.ClusterID)
if err != nil {
return err
}

// See if the provider needs to persist any state along with the config
pState, err := newProvider.State()
if err != nil {
return fmt.Errorf("error getting provider state: %v", err)
}
args.Config.State = pState

// Compare the new provider's root CA ID to the current one. If they
// match, just update the existing provider with the new config.
// If they don't match, begin the root rotation process.
_, root, err := state.CARootActive(nil)
if err != nil {
return err
}

// If the root didn't change or if this is a secondary DC, just update the
// config and return.
if (s.srv.config.Datacenter != s.srv.config.PrimaryDatacenter) ||
root != nil && root.ID == newActiveRoot.ID {
args.Op = structs.CAOpSetConfig
resp, err := s.srv.raftApply(structs.ConnectCARequestType, args)
if err != nil {
return err
}
if respErr, ok := resp.(error); ok {
return respErr
}

// If the config has been committed, update the local provider instance
cleanupNewProvider = false
s.srv.setCAProvider(newProvider, newActiveRoot)

s.logger.Info("CA provider config updated")

return nil
}

// At this point, we know the config change has trigged a root rotation,
// either by swapping the provider type or changing the provider's config
// to use a different root certificate.

// First up, sanity check that the current provider actually supports
// cross-signing.
oldProvider, _ := s.srv.getCAProvider()
if oldProvider == nil {
return fmt.Errorf("internal error: CA provider is nil")
}
canXSign, err := oldProvider.SupportsCrossSigning()
if err != nil {
return fmt.Errorf("CA provider error: %s", err)
}
if !canXSign && !args.Config.ForceWithoutCrossSigning {
return errors.New("The current CA Provider does not support cross-signing. " +
"You can try again with ForceWithoutCrossSigningSet but this may cause " +
"disruption - see documentation for more.")
}
if !canXSign && args.Config.ForceWithoutCrossSigning {
s.logger.Warn("current CA doesn't support cross signing but " +
"CA reconfiguration forced anyway with ForceWithoutCrossSigning")
}

// If it's a config change that would trigger a rotation (different provider/root):
// 1. Get the root from the new provider.
// 2. Call CrossSignCA on the old provider to sign the new root with the old one to
// get a cross-signed certificate.
// 3. Take the active root for the new provider and append the intermediate from step 2
// to its list of intermediates.
newRoot, err := connect.ParseCert(newRootPEM)
if err != nil {
return err
}

if canXSign {
// Have the old provider cross-sign the new root
xcCert, err := oldProvider.CrossSignCA(newRoot)
if err != nil {
return err
}

// Add the cross signed cert to the new CA's intermediates (to be attached
// to leaf certs).
newActiveRoot.IntermediateCerts = []string{xcCert}
}

intermediate, err := newProvider.GenerateIntermediate()
if err != nil {
return err
}
if intermediate != newRootPEM {
newActiveRoot.IntermediateCerts = append(newActiveRoot.IntermediateCerts, intermediate)
}

// Update the roots and CA config in the state store at the same time
idx, roots, err := state.CARoots(nil)
if err != nil {
return err
}

var newRoots structs.CARoots
for _, r := range roots {
newRoot := *r
if newRoot.Active {
newRoot.Active = false
newRoot.RotatedOutAt = time.Now()
}
newRoots = append(newRoots, &newRoot)
}
newRoots = append(newRoots, newActiveRoot)

args.Op = structs.CAOpSetRootsAndConfig
args.Index = idx
args.Config.ModifyIndex = confIdx
args.Roots = newRoots
resp, err := s.srv.raftApply(structs.ConnectCARequestType, args)
if err != nil {
return err
}
if respErr, ok := resp.(error); ok {
return respErr
}
if respOk, ok := resp.(bool); ok && !respOk {
return fmt.Errorf("could not atomically update roots and config")
}

// If the config has been committed, update the local provider instance
// and call teardown on the old provider
cleanupNewProvider = false
s.srv.setCAProvider(newProvider, newActiveRoot)

if err := oldProvider.Cleanup(); err != nil {
s.logger.Warn("failed to clean up old provider", "provider", config.Provider)
}

s.logger.Info("CA rotated to new root under provider", "provider", args.Config.Provider)

return nil
return s.srv.caManager.UpdateConfiguration(args)
}

// Roots returns the currently trusted root certificates.
Expand Down Expand Up @@ -438,7 +227,7 @@ func (s *ConnectCA) SignIntermediate(
return acl.ErrPermissionDenied
}

provider, _ := s.srv.getCAProvider()
provider, _ := s.srv.caManager.getCAProvider()
if provider == nil {
return fmt.Errorf("internal error: CA provider is nil")
}
Expand Down
Loading

0 comments on commit 6e62166

Please sign in to comment.