Skip to content

Commit

Permalink
Fix unlocked mounts read (hashicorp#29091)
Browse files Browse the repository at this point in the history
This PR fixes copy-paste error in the product usage code where we were
taking out the authLock to access the mount table.

While we're add it we can remove the existing lock grabbing in the
product usage goroutine in favor of a serialized startup/teardown of
censusManager and its core dependency which requires the lock. This
requires some minor test edits, so created a test helper for that.

By moving the censusManager teardown before expirationManager teardown,
we can effectively ensure the goroutine is completely stopped outside of
any expirationManager change.

We are already guaranteed serial startup, so this should free us of any
complex lock semantics.
  • Loading branch information
mpalmi authored Dec 4, 2024
1 parent 73bf3eb commit a67e062
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 42 deletions.
3 changes: 3 additions & 0 deletions changelog/29091.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
core/metrics: Fix unlocked mounts read for usage reporting.
```
11 changes: 7 additions & 4 deletions vault/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -2898,14 +2898,17 @@ func (c *Core) preSeal() error {
if err := c.teardownAudits(); err != nil {
result = multierror.Append(result, fmt.Errorf("error tearing down audits: %w", err))
}
if err := c.stopExpiration(); err != nil {
result = multierror.Append(result, fmt.Errorf("error stopping expiration: %w", err))
}
// Ensure that the ActivityLog and CensusManager are both completely torn
// down before stopping the ExpirationManager. This ordering is critical,
// due to a tight coupling between the ActivityLog, CensusManager, and
// ExpirationManager for product usage reporting.
c.stopActivityLog()
// Clean up census on seal
if err := c.teardownCensusManager(); err != nil {
result = multierror.Append(result, fmt.Errorf("error tearing down reporting agent: %w", err))
}
if err := c.stopExpiration(); err != nil {
result = multierror.Append(result, fmt.Errorf("error stopping expiration: %w", err))
}
if err := c.teardownCredentials(context.Background()); err != nil {
result = multierror.Append(result, fmt.Errorf("error tearing down credentials: %w", err))
}
Expand Down
26 changes: 6 additions & 20 deletions vault/core_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -540,22 +540,15 @@ func getMeanNamespaceSecrets(mapOfNamespacesToSecrets map[string]int) int {
func (c *Core) GetSecretEngineUsageMetrics() map[string]int {
mounts := make(map[string]int)

c.authLock.RLock()
defer c.authLock.RUnlock()

// we don't grab the statelock, so this code might run during or after the seal process.
// Therefore, we need to check if c.auth is nil. If we do not, this will panic when
// run after seal.
if c.auth == nil {
return mounts
}
c.mountsLock.RLock()
defer c.mountsLock.RUnlock()

for _, entry := range c.mounts.Entries {
authType := entry.Type
if _, ok := mounts[authType]; !ok {
mounts[authType] = 1
mountType := entry.Type
if _, ok := mounts[mountType]; !ok {
mounts[mountType] = 1
} else {
mounts[authType] += 1
mounts[mountType] += 1
}
}
return mounts
Expand All @@ -568,13 +561,6 @@ func (c *Core) GetAuthMethodUsageMetrics() map[string]int {
c.authLock.RLock()
defer c.authLock.RUnlock()

// we don't grab the statelock, so this code might run during or after the seal process.
// Therefore, we need to check if c.auth is nil. If we do not, this will panic when
// run after seal.
if c.auth == nil {
return mounts
}

for _, entry := range c.auth.Entries {
authType := entry.Type
if _, ok := mounts[authType]; !ok {
Expand Down
7 changes: 5 additions & 2 deletions vault/expiration.go
Original file line number Diff line number Diff line change
Expand Up @@ -435,8 +435,11 @@ func (c *Core) setupExpiration(e ExpireLeaseStrategy) error {
return nil
}

// stopExpiration is used to stop the expiration manager before
// sealing the Vault.
// stopExpiration is used to stop the expiration manager before sealing Vault.
// This *must* be called after shutting down the ActivityLog and
// CensusManager to prevent Core's expirationManager reference from
// changing while being accessed by product usage reporting. This is
// an unfortunate side-effect of tight coupling between ActivityLog and Core.
func (c *Core) stopExpiration() error {
if c.expiration != nil {
if err := c.expiration.Stop(); err != nil {
Expand Down
32 changes: 20 additions & 12 deletions vault/expiration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -855,10 +855,7 @@ func TestExpiration_Restore(t *testing.T) {
}

// Stop everything
err = c.stopExpiration()
if err != nil {
t.Fatalf("err: %v", err)
}
stopExpiration(t, c)

if exp.leaseCount != 0 {
t.Fatalf("expected %v leases, got %v", 0, exp.leaseCount)
Expand Down Expand Up @@ -3008,6 +3005,23 @@ func registerOneLease(t *testing.T, ctx context.Context, exp *ExpirationManager)
return leaseID
}

// stopExpiration is a test helper which allows us to safely teardown the
// expiration manager. This preserves the shutdown order of Core for these few
// outlier tests that (previously) directly called [Core].stopExpiration().
func stopExpiration(t *testing.T, core *Core) {
t.Helper()
core.stopActivityLog()
err := core.teardownCensusManager()
if err != nil {
t.Fatalf("error stopping census manager: %v", err)
}

err = core.stopExpiration()
if err != nil {
t.Fatalf("error stopping expiration manager: %v", err)
}
}

func TestExpiration_MarkIrrevocable(t *testing.T) {
c, _, _ := TestCoreUnsealed(t)
exp := c.expiration
Expand Down Expand Up @@ -3060,10 +3074,7 @@ func TestExpiration_MarkIrrevocable(t *testing.T) {
}

// stop and restore to verify that irrevocable leases are properly loaded from storage
err = c.stopExpiration()
if err != nil {
t.Fatalf("error stopping expiration manager: %v", err)
}
stopExpiration(t, c)

err = exp.Restore(nil)
if err != nil {
Expand Down Expand Up @@ -3153,10 +3164,7 @@ func TestExpiration_StopClearsIrrevocableCache(t *testing.T) {
exp.markLeaseIrrevocable(ctx, le, fmt.Errorf("test irrevocable error"))
exp.pendingLock.Unlock()

err = c.stopExpiration()
if err != nil {
t.Fatalf("error stopping expiration manager: %v", err)
}
stopExpiration(t, c)

if _, ok := exp.irrevocable.Load(leaseID); ok {
t.Error("expiration manager irrevocable cache should be cleared on stop")
Expand Down
5 changes: 1 addition & 4 deletions vault/token_store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1170,10 +1170,7 @@ func TestTokenStore_CreateLookup_ExpirationInRestoreMode(t *testing.T) {
t.Fatalf("err: %v", err)
}

err = c.stopExpiration()
if err != nil {
t.Fatal(err)
}
stopExpiration(t, c)

// Reset expiration manager to restore mode
ts.expiration.restoreModeLock.Lock()
Expand Down

0 comments on commit a67e062

Please sign in to comment.