Skip to content

Commit

Permalink
Add Reindex In Progress Metric (#23160)
Browse files Browse the repository at this point in the history
* Add a telemetry metric to track if a reindex is in progress or not

* changelog

* Add other reindex related metrics

* cleanup types

* Add docs for these metrics

* check for nil values
  • Loading branch information
ltcarbonell authored Sep 22, 2023
1 parent 68dd82c commit c93137d
Show file tree
Hide file tree
Showing 7 changed files with 57 additions and 0 deletions.
3 changes: 3 additions & 0 deletions changelog/23160.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
replication: Add re-index status metric to telemetry
```
23 changes: 23 additions & 0 deletions vault/core_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,29 @@ func (c *Core) metricsLoop(stopCh chan struct{}) {
c.metricSink.SetGaugeWithLabels([]string{"core", "replication", "dr", "secondary"}, 0, nil)
}

if haState == consts.Active {
reindexState := c.ReindexStage()
if reindexState != nil {
c.metricSink.SetGaugeWithLabels([]string{"core", "replication", "reindex_stage"}, float32(*reindexState), nil)
} else {
c.metricSink.SetGaugeWithLabels([]string{"core", "replication", "reindex_stage"}, 0, nil)
}

buildProgress := c.BuildProgress()
if buildProgress != nil {
c.metricSink.SetGaugeWithLabels([]string{"core", "replication", "build_progress"}, float32(*buildProgress), nil)
} else {
c.metricSink.SetGaugeWithLabels([]string{"core", "replication", "build_progress"}, 0, nil)
}

buildTotal := c.BuildTotal()
if buildTotal != nil {
c.metricSink.SetGaugeWithLabels([]string{"core", "replication", "build_total"}, float32(*buildTotal), nil)
} else {
c.metricSink.SetGaugeWithLabels([]string{"core", "replication", "build_total"}, 0, nil)
}
}

// If we're using a raft backend, emit raft metrics
if rb, ok := c.underlyingPhysical.(*raft.RaftBackend); ok {
rb.CollectMetrics(c.MetricSink())
Expand Down
4 changes: 4 additions & 0 deletions vault/core_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ func (c *Core) UndoLogsEnabled() bool { return false }
func (c *Core) UndoLogsPersisted() (bool, error) { return false, nil }
func (c *Core) PersistUndoLogs() error { return nil }

func (c *Core) ReindexStage() *uint32 { return nil }
func (c *Core) BuildProgress() *uint32 { return nil }
func (c *Core) BuildTotal() *uint32 { return nil }

func (c *Core) teardownReplicationResolverHandler() {}
func createSecondaries(*Core, *CoreConfig) {}

Expand Down
6 changes: 6 additions & 0 deletions website/content/docs/internals/telemetry/metrics/all.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,12 @@ alphabetic order by name.

@include 'telemetry-metrics/vault/core/replication/write_undo_logs.mdx'

@include 'telemetry-metrics/vault/core/replication/build_progress.mdx'

@include 'telemetry-metrics/vault/core/replication/build_total.mdx'

@include 'telemetry-metrics/vault/core/replication/reindex_stage.mdx'

@include 'telemetry-metrics/vault/core/seal_internal.mdx'

@include 'telemetry-metrics/vault/core/seal_with_request.mdx'
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
### vault.core.replication.build_progress ((#vault-core-replication-build_progress))

Metric type | Value | Description
----------- | ------- | -----------
gauge | keys | Number of keys that have been inserted into the new tree
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
### vault.core.replication.build_total ((#vault-core-replication-build_total))

Metric type | Value | Description
----------- | ------- | -----------
gauge | keys | Total number of keys that have to be inserted into the new tree
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
### vault.core.replication.reindex_stage ((#vault-core-replication-reindex_stage))

Metric type | Value | Description
----------- | ------- | -----------
gauge | stage | Current stage of the reindexing process

- A value of `4` indicates the reindex process is committing any differences between the newly created tree and the old tree.
- A value of `3` indicates the reindex process is replaying WALs to ensure no updates were missed while scanning and building.
- A value of `2` indicates the reindex process is currently building a new merkle tree based of the values for the keys obtained in the scanning stage.
- A value of `1` indicates the reindex process is currently creating a list of all known storage keys.
- A value of `0` indicates that a reindex is not in progress.

0 comments on commit c93137d

Please sign in to comment.