TencentBlueKing · DeveloperJim · Feb 23, 2021 · Feb 23, 2021
diff --git a/...mestatefulset-operator/doc/helm/bcs-gamestatefulset-operator/templates/3-clusterrole.yaml b/...mestatefulset-operator/doc/helm/bcs-gamestatefulset-operator/templates/3-clusterrole.yaml
@@ -7,6 +7,7 @@ rules:
 - apiGroups: [""]
   resources:
   - pods
+  - pods/status
   - secrets
   - services
   - configmaps
@@ -73,4 +74,4 @@ subjects:
   - kind: ServiceAccount
     name: gamestatefulset-operator
     namespace: bcs-system
----
+---
diff --git a/bcs-k8s/bcs-hook-operator/pkg/controllers/hook/hook_reconcile.go b/bcs-k8s/bcs-hook-operator/pkg/controllers/hook/hook_reconcile.go
@@ -65,7 +65,8 @@ func (hc *HookController) reconcileHookRun(origRun *v1alpha1.HookRun) *v1alpha1.
 			klog.Warning(message)
 			run.Status.Phase = v1alpha1.HookPhaseError
 			run.Status.Message = message
-			hc.recorder.Eventf(run, corev1.EventTypeWarning, EventReasonStatusFailed, "hook completed %s", run.Status.Phase)
+			hc.recorder.Eventf(run, corev1.EventTypeWarning, EventReasonStatusFailed, "hook completed %s",
+				run.Status.Phase)
 			return run
 		}
 	}
@@ -75,13 +76,16 @@ func (hc *HookController) reconcileHookRun(origRun *v1alpha1.HookRun) *v1alpha1.
 
 	newStatus := hc.assessRunStatus(run)
 	if newStatus != run.Status.Phase {
-		message := fmt.Sprintf("HookRun: %s/%s, hook transitioned from %s -> %s", run.Namespace, run.Name, run.Status.Phase, newStatus)
+		message := fmt.Sprintf("HookRun: %s/%s, hook transitioned from %s -> %s", run.Namespace, run.Name,
+			run.Status.Phase, newStatus)
 		if newStatus.Completed() {
 			switch newStatus {
 			case v1alpha1.HookPhaseError, v1alpha1.HookPhaseFailed:
-				hc.recorder.Eventf(run, corev1.EventTypeWarning, EventReasonStatusFailed, "hook completed %s", newStatus)
+				hc.recorder.Eventf(run, corev1.EventTypeWarning, EventReasonStatusFailed,
+					"hook completed %s", newStatus)
 			default:
-				hc.recorder.Eventf(run, corev1.EventTypeNormal, EventReasonStatusCompleted, "hook completed %s", newStatus)
+				hc.recorder.Eventf(run, corev1.EventTypeNormal, EventReasonStatusCompleted,
+					"hook completed %s", newStatus)
 			}
 		}
 		klog.Info(message)
@@ -126,7 +130,8 @@ func generateMetricTasks(run *v1alpha1.HookRun) []metricTask {
 			continue
 		}
 		if terminating {
-			klog.Infof("HookRun: %s/%s, metric: %s. skipping measurement，run is terminating", run.Namespace, run.Name, metric.Name)
+			klog.Infof("HookRun: %s/%s, metric: %s. skipping measurement，run is terminating",
+				run.Namespace, run.Name, metric.Name)
 			continue
 		}
 		if lastMeasurement == nil {
@@ -136,17 +141,20 @@ func generateMetricTasks(run *v1alpha1.HookRun) []metricTask {
 				}
 				duration, err := metric.InitialDelay.Duration()
 				if err != nil {
-					klog.Warningf("HookRun: %s/%s, metric: %s. failed to parse duration: %s", run.Namespace, run.Name, metric.Name, err.Error())
+					klog.Warningf("HookRun: %s/%s, metric: %s. failed to parse duration: %s",
+						run.Namespace, run.Name, metric.Name, err.Error())
 					continue
 				}
 				if run.Status.StartedAt.Add(duration).After(time.Now()) {
-					klog.Infof("HookRun: %s/%s, metric: %s. waiting until start delay duration passes", run.Namespace, run.Name, metric.Name)
+					klog.Infof("HookRun: %s/%s, metric: %s. waiting until start delay duration passes",
+						run.Namespace, run.Name, metric.Name)
 					continue
 				}
 			}
 			// measurement never taken
 			tasks = append(tasks, metricTask{metric: metric})
-			klog.Infof("HookRun: %s/%s, metric: %s. running initial measurement", run.Namespace, run.Name, metric.Name)
+			klog.Infof("HookRun: %s/%s, metric: %s. running initial measurement", run.Namespace, run.Name,
+				metric.Name)
 			continue
 		}
 		metricResult := hooksutil.GetResult(run, metric.Name)
@@ -162,14 +170,16 @@ func generateMetricTasks(run *v1alpha1.HookRun) []metricTask {
 		if metric.Interval != "" {
 			metricInterval, err := metric.Interval.Duration()
 			if err != nil {
-				klog.Warningf("HookRun: %s/%s, metric: %s. failed to parse internal: %s", run.Namespace, run.Name, metric.Name, err.Error())
+				klog.Warningf("HookRun: %s/%s, metric: %s. failed to parse internal: %s", run.Namespace,
+					run.Name, metric.Name, err.Error())
 				continue
 			}
 			interval = metricInterval
 		}
 		if time.Now().After(lastMeasurement.FinishedAt.Add(interval)) {
 			tasks = append(tasks, metricTask{metric: metric})
-			klog.Infof("HookRun: %s/%s, metric: %s. running overdue measurement", run.Namespace, run.Name, metric.Name)
+			klog.Infof("HookRun: %s/%s, metric: %s. running overdue measurement", run.Namespace, run.Name,
+				metric.Name)
 			continue
 		}
 	}
@@ -215,7 +225,8 @@ func (hc *HookController) runMeasurements(run *v1alpha1.HookRun, tasks []metricT
 					newMeasurement = provider.Run(run, t.metric)
 				} else {
 					if terminating {
-						klog.Infof("HookRun: %s/%s, metric: %s. terminating in-progress measurement", run.Namespace, run.Name, t.metric.Name)
+						klog.Infof("HookRun: %s/%s, metric: %s. terminating in-progress measurement",
+							run.Namespace, run.Name, t.metric.Name)
 						newMeasurement = provider.Terminate(run, t.metric, *t.incompleteMeasurement)
 						if newMeasurement.Phase == v1alpha1.HookPhaseSuccessful {
 							newMeasurement.Message = "metric terminated"
@@ -227,7 +238,8 @@ func (hc *HookController) runMeasurements(run *v1alpha1.HookRun, tasks []metricT
 			}
 
 			if newMeasurement.Phase.Completed() {
-				klog.Infof("HookRun: %s/%s, metric: %s. measurement completed %s", run.Namespace, run.Name, t.metric.Name, newMeasurement.Phase)
+				klog.Infof("HookRun: %s/%s, metric: %s. measurement completed %s", run.Namespace, run.Name,
+					t.metric.Name, newMeasurement.Phase)
 				if newMeasurement.FinishedAt == nil {
 					finishedAt := metav1.Now()
 					newMeasurement.FinishedAt = &finishedAt
@@ -237,17 +249,21 @@ func (hc *HookController) runMeasurements(run *v1alpha1.HookRun, tasks []metricT
 					metricResult.Successful++
 					metricResult.Count++
 					metricResult.ConsecutiveError = 0
+					metricResult.ConsecutiveSuccessful ++
 				case v1alpha1.HookPhaseFailed:
 					metricResult.Failed++
 					metricResult.Count++
 					metricResult.ConsecutiveError = 0
+					metricResult.ConsecutiveSuccessful = 0
 				case v1alpha1.HookPhaseInconclusive:
 					metricResult.Inconclusive++
 					metricResult.Count++
 					metricResult.ConsecutiveError = 0
+					metricResult.ConsecutiveSuccessful = 0
 				case v1alpha1.HookPhaseError:
 					metricResult.Error++
 					metricResult.ConsecutiveError++
+					metricResult.ConsecutiveSuccessful = 0
 				}
 			}
 			if t.incompleteMeasurement == nil {
@@ -286,9 +302,11 @@ func (hc *HookController) assessRunStatus(run *v1alpha1.HookRun) v1alpha1.HookPh
 				if metricStatus.Completed() {
 					switch metricStatus {
 					case v1alpha1.HookPhaseError, v1alpha1.HookPhaseFailed:
-						hc.recorder.Eventf(run, corev1.EventTypeWarning, EventReasonStatusFailed, "metric '%s' completed %s", metric.Name, metricStatus)
+						hc.recorder.Eventf(run, corev1.EventTypeWarning, EventReasonStatusFailed,
+							"metric '%s' completed %s", metric.Name, metricStatus)
 					default:
-						hc.recorder.Eventf(run, corev1.EventTypeNormal, EventReasonStatusCompleted, "metric '%s' completed %s", metric.Name, metricStatus)
+						hc.recorder.Eventf(run, corev1.EventTypeNormal, EventReasonStatusCompleted,
+							"metric '%s' completed %s", metric.Name, metricStatus)
 					}
 				}
 				if lastMeasurement := hooksutil.LastMeasurement(run, metric.Name); lastMeasurement != nil {
@@ -337,28 +355,48 @@ func assessMetricStatus(metric v1alpha1.Metric, result v1alpha1.MetricResult, te
 		return v1alpha1.HookPhaseRunning
 	}
 	if result.Failed > metric.FailureLimit {
-		klog.Infof("metric %s assessed %s: failed (%d) > failureLimit (%d)", metric.Name, v1alpha1.HookPhaseFailed, result.Failed, metric.FailureLimit)
+		klog.Infof("metric %s assessed %s: failed (%d) > failureLimit (%d)", metric.Name,
+			v1alpha1.HookPhaseFailed, result.Failed, metric.FailureLimit)
 		return v1alpha1.HookPhaseFailed
 	}
+
+	if metric.SuccessfulLimit > 0 && result.Successful >= metric.SuccessfulLimit {
+		klog.Infof("metric %s assessed %s: successful (%d) > successfulLimit (%d)", metric.Name,
+			v1alpha1.HookPhaseSuccessful , result.Successful, metric.SuccessfulLimit)
+		return v1alpha1.HookPhaseSuccessful
+	}
+
 	if result.Inconclusive > metric.InconclusiveLimit {
-		klog.Infof("metric %s assessed %s: inconclusive (%d) > inconclusiveLimit (%d)", metric.Name, v1alpha1.HookPhaseInconclusive, result.Inconclusive, metric.InconclusiveLimit)
+		klog.Infof("metric %s assessed %s: inconclusive (%d) > inconclusiveLimit (%d)", metric.Name,
+			v1alpha1.HookPhaseInconclusive, result.Inconclusive, metric.InconclusiveLimit)
 		return v1alpha1.HookPhaseInconclusive
 	}
 	consecutiveErrorLimit := DefaultConsecutiveErrorLimit
 	if metric.ConsecutiveErrorLimit != nil {
 		consecutiveErrorLimit = *metric.ConsecutiveErrorLimit
 	}
 	if result.ConsecutiveError > consecutiveErrorLimit {
-		klog.Infof("metric %s assessed %s: consecutiveErrors (%d) > consecutiveErrorLimit (%d)", metric.Name, v1alpha1.HookPhaseError, result.ConsecutiveError, metric.ConsecutiveErrorLimit)
+		klog.Infof("metric %s assessed %s: consecutiveErrors (%d) > consecutiveErrorLimit (%d)",
+			metric.Name, v1alpha1.HookPhaseError, result.ConsecutiveError, consecutiveErrorLimit)
 		return v1alpha1.HookPhaseError
 	}
 
+	if metric.ConsecutiveSuccessfulLimit != nil {
+		if result.ConsecutiveSuccessful >= *metric.ConsecutiveSuccessfulLimit {
+			klog.Infof("metric %s assessed %s: consecutiveSuccessful (%d) >= consecutiveSuccessfulLimit (%d)",
+				metric.Name, v1alpha1.HookPhaseSuccessful, result.ConsecutiveSuccessful,
+				*metric.ConsecutiveSuccessfulLimit)
+			return v1alpha1.HookPhaseSuccessful
+		}
+	}
+
 	// If a count was specified, and we reached that count, then metric is considered Successful.
 	// The Error, Failed, Inconclusive counters are ignored because those checks have already been
 	// taken into consideration above, and we do not want to fail if failures < failureLimit.
 	effectiveCount := metric.EffectiveCount()
 	if effectiveCount != nil && result.Count >= *effectiveCount {
-		klog.Infof("metric %s assessed %s: count (%d) reached", metric.Name, v1alpha1.HookPhaseSuccessful, *effectiveCount)
+		klog.Infof("metric %s assessed %s: count (%d) reached", metric.Name, v1alpha1.HookPhaseSuccessful,
+			*effectiveCount)
 		return v1alpha1.HookPhaseSuccessful
 	}
 
@@ -422,7 +460,8 @@ func calculateNextReconcileTime(run *v1alpha1.HookRun) *time.Time {
 				}
 				duration, err := metric.InitialDelay.Duration()
 				if err != nil {
-					klog.Warningf("HookRun: %s/%s, metric: %s. failed to parse interval: %v", run.Namespace, run.Name, metric.Name, err)
+					klog.Warningf("HookRun: %s/%s, metric: %s. failed to parse interval: %v",
+						run.Namespace, run.Name, metric.Name, err)
 					continue
 				}
 				endInitialDelay := startTime.Add(duration)
@@ -431,7 +470,8 @@ func calculateNextReconcileTime(run *v1alpha1.HookRun) *time.Time {
 				}
 				continue
 			}
-			klog.Warningf("HookRun: %s/%s, metric: %s. metric never started. not factored into enqueue time", run.Namespace, run.Name, metric.Name)
+			klog.Warningf("HookRun: %s/%s, metric: %s. metric never started. " +
+				"not factored into enqueue time", run.Namespace, run.Name, metric.Name)
 			continue
 		}
 		if lastMeasurement.FinishedAt == nil {
@@ -451,7 +491,8 @@ func calculateNextReconcileTime(run *v1alpha1.HookRun) *time.Time {
 		if metric.Interval != "" {
 			metricInterval, err := metric.Interval.Duration()
 			if err != nil {
-				klog.Warningf("HookRun: %s/%s, metric: %s. failed to parse interval: %v", run.Namespace, run.Name, metric.Name, err)
+				klog.Warningf("HookRun: %s/%s, metric: %s. failed to parse interval: %v", run.Namespace,
+					run.Name, metric.Name, err)
 				continue
 			}
 			interval = metricInterval
@@ -462,7 +503,8 @@ func calculateNextReconcileTime(run *v1alpha1.HookRun) *time.Time {
 			// there was no error (meaning we don't need to retry). no need to requeue this metric.
 			// NOTE: we shouldn't ever get here since it means we are not doing proper bookkeeping
 			// of count.
-			klog.Warningf("HookRun: %s/%s, metric: %s. skipping requeue. no interval or error (count: %d, effectiveCount: %d)", run.Namespace, run.Name, metric.Name, metricResult.Count, metric.EffectiveCount())
+			klog.Warningf("HookRun: %s/%s, metric: %s. skipping requeue. no interval or error (count: %d, " +
+				"effectiveCount: %d)", run.Namespace, run.Name, metric.Name, metricResult.Count, metric.EffectiveCount())
 			continue
 		}
 

diff --git a/bcs-k8s/bcs-hook-operator/pkg/util/hook/hooks.go b/bcs-k8s/bcs-hook-operator/pkg/util/hook/hooks.go
@@ -43,8 +43,8 @@ func ValidateMetrics(metrics []v1alpha1.Metric) error {
 // ValidateMetric validates a single metric spec
 func ValidateMetric(metric v1alpha1.Metric) error {
 	if metric.Count > 0 {
-		if metric.Count < metric.FailureLimit {
-			return fmt.Errorf("count must be >= failureLimit")
+		if (metric.Count < metric.FailureLimit) || (metric.Count < metric.SuccessfulLimit) {
+			return fmt.Errorf("count must be >= failureLimit && >= successfulLimit")
 		}
 		if metric.Count < metric.InconclusiveLimit {
 			return fmt.Errorf("count must be >= inconclusiveLimit")
@@ -67,12 +67,20 @@ func ValidateMetric(metric v1alpha1.Metric) error {
 	if metric.FailureLimit < 0 {
 		return fmt.Errorf("failureLimit must be >= 0")
 	}
+
+	if metric.SuccessfulLimit < 0 {
+		return fmt.Errorf("successLimit must be >= 0")
+	}
+
 	if metric.InconclusiveLimit < 0 {
 		return fmt.Errorf("inconclusiveLimit must be >= 0")
 	}
 	if metric.ConsecutiveErrorLimit != nil && *metric.ConsecutiveErrorLimit < 0 {
 		return fmt.Errorf("consecutiveErrorLimit must be >= 0")
 	}
+	if metric.ConsecutiveSuccessfulLimit != nil && *metric.ConsecutiveSuccessfulLimit < 1 {
+		return fmt.Errorf("consecutiveSuccessfulLimit must be >= 1")
+	}
 	numProviders := 0
 
 	if metric.Provider.Web != nil {

diff --git a/bcs-k8s/kubernetes/common/bcs-hook/apis/tkex/v1alpha1/hook_types.go b/bcs-k8s/kubernetes/common/bcs-hook/apis/tkex/v1alpha1/hook_types.go
@@ -107,12 +107,18 @@ type Metric struct {
 	// FailureLimit is the maximum number of times the measurement is allowed to fail, before the
 	// entire metric is considered Failed (default: 0)
 	FailureLimit int32 `json:"failureLimit,omitempty"`
+	// SuccessfulLimit is the maximum number of times the measurement is to success, before the
+	// entire metric is considered Running (default: 0)
+	SuccessfulLimit int32 `json:"successfulLimit,omitempty"`
 	// InconclusiveLimit is the maximum number of times the measurement is allowed to measure
 	// Inconclusive, before the entire metric is considered Inconclusive (default: 0)
 	InconclusiveLimit int32 `json:"inconclusiveLimit,omitempty"`
 	// ConsecutiveErrorLimit is the maximum number of times the measurement is allowed to error in
 	// succession, before the metric is considered error (default: 4)
 	ConsecutiveErrorLimit *int32 `json:"consecutiveErrorLimit,omitempty"`
+	// ConsecutiveSuccessfulLimit is the minmum number of times the measurement is allowed to success in
+	// succession, before the metric is considered success
+	ConsecutiveSuccessfulLimit *int32 `json:"consecutiveSuccessfulLimit,omitempty"`
 	// Provider configuration to the external system to use to verify the analysis
 	// +kubebuilder:validation:Required
 	Provider MetricProvider `json:"provider"`
@@ -203,6 +209,7 @@ type MetricResult struct {
 	Inconclusive     int32         `json:"inconclusive,omitempty"`
 	Error            int32         `json:"error,omitempty"`
 	ConsecutiveError int32         `json:"consecutiveError,omitempty"`
+	ConsecutiveSuccessful int32         `json:"consecutiveSuccessful,omitempty"`
 }
 
 type Measurement struct {

diff --git a/bcs-k8s/kubernetes/common/bcs-hook/apis/tkex/v1alpha1/zz_generated.deepcopy.go b/bcs-k8s/kubernetes/common/bcs-hook/apis/tkex/v1alpha1/zz_generated.deepcopy.go