Skip to content

Commit

Permalink
*: reset metrics after the leader steps down (#1790) (#2185)
Browse files Browse the repository at this point in the history
  • Loading branch information
rleungx authored Mar 9, 2020
1 parent cbc6f0b commit 3208af2
Show file tree
Hide file tree
Showing 9 changed files with 56 additions and 3 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ default.pd
tags
/.retools/
default.pd*
.vscode/
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ static:
gofmt -s -l $$($(PACKAGE_DIRECTORIES)) 2>&1 | $(GOCHECKER)
./hack/retool do govet --shadow $$($(PACKAGE_DIRECTORIES)) 2>&1 | $(GOCHECKER)

CGO_ENABLED=0 ./hack/retool do gometalinter.v2 --disable-all --deadline 120s \
CGO_ENABLED=0 ./hack/retool do gometalinter.v2 --disable-all --deadline 240s \
--enable misspell \
--enable staticcheck \
--enable ineffassign \
Expand Down
12 changes: 12 additions & 0 deletions server/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,16 @@ func (c *RaftCluster) collectMetrics() {
c.collectHealthStatus()
}

func (c *RaftCluster) resetMetrics() {
cluster := c.cachedCluster
statsMap := newStoreStatisticsMap(c.cachedCluster.opt, c.GetNamespaceClassifier())
statsMap.Reset()

c.coordinator.resetSchedulerMetrics()
c.coordinator.resetHotSpotMetrics()
cluster.resetMetrics()
}

func (c *RaftCluster) collectHealthStatus() {
client := c.s.GetClient()
members, err := GetMembers(client)
Expand All @@ -593,6 +603,8 @@ func (c *RaftCluster) runBackgroundJobs(interval time.Duration) {
for {
select {
case <-c.quit:
log.Info("metrics are reset")
c.resetMetrics()
log.Info("background jobs has been stopped")
return
case <-ticker.C:
Expand Down
16 changes: 14 additions & 2 deletions server/cluster_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -636,17 +636,29 @@ func (c *clusterInfo) updateRegionsLabelLevelStats(regions []*core.RegionInfo) {
}

func (c *clusterInfo) collectMetrics() {
c.RLock()
defer c.RUnlock()
if c.regionStats == nil {
return
}
c.RLock()
defer c.RUnlock()
c.regionStats.Collect()
c.labelLevelStats.Collect()
// collect hot cache metrics
c.core.HotCache.CollectMetrics(c.core.Stores)
}

func (c *clusterInfo) resetMetrics() {
c.RLock()
defer c.RUnlock()
if c.regionStats == nil {
return
}
c.regionStats.Reset()
c.labelLevelStats.Reset()
// reset hot cache metrics
c.core.HotCache.ResetMetrics()
}

func (c *clusterInfo) GetRegionStatsByType(typ regionStatisticType) []*core.RegionInfo {
if c.regionStats == nil {
return nil
Expand Down
7 changes: 7 additions & 0 deletions server/coordinator.go
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,10 @@ func (c *coordinator) collectSchedulerMetrics() {
}
}

func (c *coordinator) resetSchedulerMetrics() {
schedulerStatusGauge.Reset()
}

func (c *coordinator) collectHotSpotMetrics() {
c.RLock()
defer c.RUnlock()
Expand Down Expand Up @@ -412,7 +416,10 @@ func (c *coordinator) collectHotSpotMetrics() {
hotSpotStatusGauge.WithLabelValues(store, "hot_read_region_as_leader").Set(0)
}
}
}

func (c *coordinator) resetHotSpotMetrics() {
hotSpotStatusGauge.Reset()
}

func (c *coordinator) shouldRun() bool {
Expand Down
3 changes: 3 additions & 0 deletions server/coordinator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,9 @@ func (s *testCoordinatorSuite) TestCollectMetrics(c *C) {
co.collectSchedulerMetrics()
co.cluster.collectMetrics()
}
co.resetHotSpotMetrics()
co.resetSchedulerMetrics()
co.cluster.resetMetrics()
}

func (s *testCoordinatorSuite) TestCheckRegion(c *C) {
Expand Down
8 changes: 8 additions & 0 deletions server/region_statistics.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,10 @@ func (r *regionStatistics) Collect() {
regionStatusGauge.WithLabelValues("learner_peer_region_count").Set(float64(len(r.stats[learnerPeer])))
}

func (r *regionStatistics) Reset() {
regionStatusGauge.Reset()
}

type labelLevelStatistics struct {
regionLabelLevelStats map[uint64]int
labelLevelCounter map[int]int
Expand Down Expand Up @@ -175,6 +179,10 @@ func (l *labelLevelStatistics) Collect() {
}
}

func (l *labelLevelStatistics) Reset() {
regionLabelLevelGauge.Reset()
}

func (l *labelLevelStatistics) clearDefunctRegion(regionID uint64) {
if level, ok := l.regionLabelLevelStats[regionID]; ok {
l.labelLevelCounter[level]--
Expand Down
5 changes: 5 additions & 0 deletions server/schedule/hot_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,11 @@ func (w *HotSpotCache) CollectMetrics(stores *core.StoresInfo) {
hotCacheStatusGauge.WithLabelValues("hotThreshold", "read").Set(float64(threshold))
}

// ResetMetrics reset the hot cache metrics
func (w *HotSpotCache) ResetMetrics() {
hotCacheStatusGauge.Reset()
}

func (w *HotSpotCache) isRegionHot(id uint64, hotThreshold int) bool {
if stat, ok := w.writeFlow.Peek(id); ok {
if stat.(*core.RegionStat).HotDegree >= hotThreshold {
Expand Down
5 changes: 5 additions & 0 deletions server/store_statistics.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,3 +146,8 @@ func (m *storeStatisticsMap) Collect() {
s.Collect()
}
}

func (m *storeStatisticsMap) Reset() {
storeStatusGauge.Reset()
clusterStatusGauge.Reset()
}

0 comments on commit 3208af2

Please sign in to comment.