Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[feature] Hook operator add fast Successful Option #778

Merged
merged 1 commit into from
Feb 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ rules:
- apiGroups: [""]
resources:
- pods
- pods/status
- secrets
- services
- configmaps
Expand Down Expand Up @@ -73,4 +74,4 @@ subjects:
- kind: ServiceAccount
name: gamestatefulset-operator
namespace: bcs-system
---
---
86 changes: 64 additions & 22 deletions bcs-k8s/bcs-hook-operator/pkg/controllers/hook/hook_reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ func (hc *HookController) reconcileHookRun(origRun *v1alpha1.HookRun) *v1alpha1.
klog.Warning(message)
run.Status.Phase = v1alpha1.HookPhaseError
run.Status.Message = message
hc.recorder.Eventf(run, corev1.EventTypeWarning, EventReasonStatusFailed, "hook completed %s", run.Status.Phase)
hc.recorder.Eventf(run, corev1.EventTypeWarning, EventReasonStatusFailed, "hook completed %s",
run.Status.Phase)
return run
}
}
Expand All @@ -75,13 +76,16 @@ func (hc *HookController) reconcileHookRun(origRun *v1alpha1.HookRun) *v1alpha1.

newStatus := hc.assessRunStatus(run)
if newStatus != run.Status.Phase {
message := fmt.Sprintf("HookRun: %s/%s, hook transitioned from %s -> %s", run.Namespace, run.Name, run.Status.Phase, newStatus)
message := fmt.Sprintf("HookRun: %s/%s, hook transitioned from %s -> %s", run.Namespace, run.Name,
run.Status.Phase, newStatus)
if newStatus.Completed() {
switch newStatus {
case v1alpha1.HookPhaseError, v1alpha1.HookPhaseFailed:
hc.recorder.Eventf(run, corev1.EventTypeWarning, EventReasonStatusFailed, "hook completed %s", newStatus)
hc.recorder.Eventf(run, corev1.EventTypeWarning, EventReasonStatusFailed,
"hook completed %s", newStatus)
default:
hc.recorder.Eventf(run, corev1.EventTypeNormal, EventReasonStatusCompleted, "hook completed %s", newStatus)
hc.recorder.Eventf(run, corev1.EventTypeNormal, EventReasonStatusCompleted,
"hook completed %s", newStatus)
}
}
klog.Info(message)
Expand Down Expand Up @@ -126,7 +130,8 @@ func generateMetricTasks(run *v1alpha1.HookRun) []metricTask {
continue
}
if terminating {
klog.Infof("HookRun: %s/%s, metric: %s. skipping measurement,run is terminating", run.Namespace, run.Name, metric.Name)
klog.Infof("HookRun: %s/%s, metric: %s. skipping measurement,run is terminating",
run.Namespace, run.Name, metric.Name)
continue
}
if lastMeasurement == nil {
Expand All @@ -136,17 +141,20 @@ func generateMetricTasks(run *v1alpha1.HookRun) []metricTask {
}
duration, err := metric.InitialDelay.Duration()
if err != nil {
klog.Warningf("HookRun: %s/%s, metric: %s. failed to parse duration: %s", run.Namespace, run.Name, metric.Name, err.Error())
klog.Warningf("HookRun: %s/%s, metric: %s. failed to parse duration: %s",
run.Namespace, run.Name, metric.Name, err.Error())
continue
}
if run.Status.StartedAt.Add(duration).After(time.Now()) {
klog.Infof("HookRun: %s/%s, metric: %s. waiting until start delay duration passes", run.Namespace, run.Name, metric.Name)
klog.Infof("HookRun: %s/%s, metric: %s. waiting until start delay duration passes",
run.Namespace, run.Name, metric.Name)
continue
}
}
// measurement never taken
tasks = append(tasks, metricTask{metric: metric})
klog.Infof("HookRun: %s/%s, metric: %s. running initial measurement", run.Namespace, run.Name, metric.Name)
klog.Infof("HookRun: %s/%s, metric: %s. running initial measurement", run.Namespace, run.Name,
metric.Name)
continue
}
metricResult := hooksutil.GetResult(run, metric.Name)
Expand All @@ -162,14 +170,16 @@ func generateMetricTasks(run *v1alpha1.HookRun) []metricTask {
if metric.Interval != "" {
metricInterval, err := metric.Interval.Duration()
if err != nil {
klog.Warningf("HookRun: %s/%s, metric: %s. failed to parse internal: %s", run.Namespace, run.Name, metric.Name, err.Error())
klog.Warningf("HookRun: %s/%s, metric: %s. failed to parse internal: %s", run.Namespace,
run.Name, metric.Name, err.Error())
continue
}
interval = metricInterval
}
if time.Now().After(lastMeasurement.FinishedAt.Add(interval)) {
tasks = append(tasks, metricTask{metric: metric})
klog.Infof("HookRun: %s/%s, metric: %s. running overdue measurement", run.Namespace, run.Name, metric.Name)
klog.Infof("HookRun: %s/%s, metric: %s. running overdue measurement", run.Namespace, run.Name,
metric.Name)
continue
}
}
Expand Down Expand Up @@ -215,7 +225,8 @@ func (hc *HookController) runMeasurements(run *v1alpha1.HookRun, tasks []metricT
newMeasurement = provider.Run(run, t.metric)
} else {
if terminating {
klog.Infof("HookRun: %s/%s, metric: %s. terminating in-progress measurement", run.Namespace, run.Name, t.metric.Name)
klog.Infof("HookRun: %s/%s, metric: %s. terminating in-progress measurement",
run.Namespace, run.Name, t.metric.Name)
newMeasurement = provider.Terminate(run, t.metric, *t.incompleteMeasurement)
if newMeasurement.Phase == v1alpha1.HookPhaseSuccessful {
newMeasurement.Message = "metric terminated"
Expand All @@ -227,7 +238,8 @@ func (hc *HookController) runMeasurements(run *v1alpha1.HookRun, tasks []metricT
}

if newMeasurement.Phase.Completed() {
klog.Infof("HookRun: %s/%s, metric: %s. measurement completed %s", run.Namespace, run.Name, t.metric.Name, newMeasurement.Phase)
klog.Infof("HookRun: %s/%s, metric: %s. measurement completed %s", run.Namespace, run.Name,
t.metric.Name, newMeasurement.Phase)
if newMeasurement.FinishedAt == nil {
finishedAt := metav1.Now()
newMeasurement.FinishedAt = &finishedAt
Expand All @@ -237,17 +249,21 @@ func (hc *HookController) runMeasurements(run *v1alpha1.HookRun, tasks []metricT
metricResult.Successful++
metricResult.Count++
metricResult.ConsecutiveError = 0
metricResult.ConsecutiveSuccessful ++
case v1alpha1.HookPhaseFailed:
metricResult.Failed++
metricResult.Count++
metricResult.ConsecutiveError = 0
metricResult.ConsecutiveSuccessful = 0
case v1alpha1.HookPhaseInconclusive:
metricResult.Inconclusive++
metricResult.Count++
metricResult.ConsecutiveError = 0
metricResult.ConsecutiveSuccessful = 0
case v1alpha1.HookPhaseError:
metricResult.Error++
metricResult.ConsecutiveError++
metricResult.ConsecutiveSuccessful = 0
}
}
if t.incompleteMeasurement == nil {
Expand Down Expand Up @@ -286,9 +302,11 @@ func (hc *HookController) assessRunStatus(run *v1alpha1.HookRun) v1alpha1.HookPh
if metricStatus.Completed() {
switch metricStatus {
case v1alpha1.HookPhaseError, v1alpha1.HookPhaseFailed:
hc.recorder.Eventf(run, corev1.EventTypeWarning, EventReasonStatusFailed, "metric '%s' completed %s", metric.Name, metricStatus)
hc.recorder.Eventf(run, corev1.EventTypeWarning, EventReasonStatusFailed,
"metric '%s' completed %s", metric.Name, metricStatus)
default:
hc.recorder.Eventf(run, corev1.EventTypeNormal, EventReasonStatusCompleted, "metric '%s' completed %s", metric.Name, metricStatus)
hc.recorder.Eventf(run, corev1.EventTypeNormal, EventReasonStatusCompleted,
"metric '%s' completed %s", metric.Name, metricStatus)
}
}
if lastMeasurement := hooksutil.LastMeasurement(run, metric.Name); lastMeasurement != nil {
Expand Down Expand Up @@ -337,28 +355,48 @@ func assessMetricStatus(metric v1alpha1.Metric, result v1alpha1.MetricResult, te
return v1alpha1.HookPhaseRunning
}
if result.Failed > metric.FailureLimit {
klog.Infof("metric %s assessed %s: failed (%d) > failureLimit (%d)", metric.Name, v1alpha1.HookPhaseFailed, result.Failed, metric.FailureLimit)
klog.Infof("metric %s assessed %s: failed (%d) > failureLimit (%d)", metric.Name,
v1alpha1.HookPhaseFailed, result.Failed, metric.FailureLimit)
return v1alpha1.HookPhaseFailed
}

if metric.SuccessfulLimit > 0 && result.Successful >= metric.SuccessfulLimit {
klog.Infof("metric %s assessed %s: successful (%d) > successfulLimit (%d)", metric.Name,
v1alpha1.HookPhaseSuccessful , result.Successful, metric.SuccessfulLimit)
return v1alpha1.HookPhaseSuccessful
}

if result.Inconclusive > metric.InconclusiveLimit {
klog.Infof("metric %s assessed %s: inconclusive (%d) > inconclusiveLimit (%d)", metric.Name, v1alpha1.HookPhaseInconclusive, result.Inconclusive, metric.InconclusiveLimit)
klog.Infof("metric %s assessed %s: inconclusive (%d) > inconclusiveLimit (%d)", metric.Name,
v1alpha1.HookPhaseInconclusive, result.Inconclusive, metric.InconclusiveLimit)
return v1alpha1.HookPhaseInconclusive
}
consecutiveErrorLimit := DefaultConsecutiveErrorLimit
if metric.ConsecutiveErrorLimit != nil {
consecutiveErrorLimit = *metric.ConsecutiveErrorLimit
}
if result.ConsecutiveError > consecutiveErrorLimit {
klog.Infof("metric %s assessed %s: consecutiveErrors (%d) > consecutiveErrorLimit (%d)", metric.Name, v1alpha1.HookPhaseError, result.ConsecutiveError, metric.ConsecutiveErrorLimit)
klog.Infof("metric %s assessed %s: consecutiveErrors (%d) > consecutiveErrorLimit (%d)",
metric.Name, v1alpha1.HookPhaseError, result.ConsecutiveError, consecutiveErrorLimit)
return v1alpha1.HookPhaseError
}

if metric.ConsecutiveSuccessfulLimit != nil {
if result.ConsecutiveSuccessful >= *metric.ConsecutiveSuccessfulLimit {
klog.Infof("metric %s assessed %s: consecutiveSuccessful (%d) >= consecutiveSuccessfulLimit (%d)",
metric.Name, v1alpha1.HookPhaseSuccessful, result.ConsecutiveSuccessful,
*metric.ConsecutiveSuccessfulLimit)
return v1alpha1.HookPhaseSuccessful
}
}

// If a count was specified, and we reached that count, then metric is considered Successful.
// The Error, Failed, Inconclusive counters are ignored because those checks have already been
// taken into consideration above, and we do not want to fail if failures < failureLimit.
effectiveCount := metric.EffectiveCount()
if effectiveCount != nil && result.Count >= *effectiveCount {
klog.Infof("metric %s assessed %s: count (%d) reached", metric.Name, v1alpha1.HookPhaseSuccessful, *effectiveCount)
klog.Infof("metric %s assessed %s: count (%d) reached", metric.Name, v1alpha1.HookPhaseSuccessful,
*effectiveCount)
return v1alpha1.HookPhaseSuccessful
}

Expand Down Expand Up @@ -422,7 +460,8 @@ func calculateNextReconcileTime(run *v1alpha1.HookRun) *time.Time {
}
duration, err := metric.InitialDelay.Duration()
if err != nil {
klog.Warningf("HookRun: %s/%s, metric: %s. failed to parse interval: %v", run.Namespace, run.Name, metric.Name, err)
klog.Warningf("HookRun: %s/%s, metric: %s. failed to parse interval: %v",
run.Namespace, run.Name, metric.Name, err)
continue
}
endInitialDelay := startTime.Add(duration)
Expand All @@ -431,7 +470,8 @@ func calculateNextReconcileTime(run *v1alpha1.HookRun) *time.Time {
}
continue
}
klog.Warningf("HookRun: %s/%s, metric: %s. metric never started. not factored into enqueue time", run.Namespace, run.Name, metric.Name)
klog.Warningf("HookRun: %s/%s, metric: %s. metric never started. " +
"not factored into enqueue time", run.Namespace, run.Name, metric.Name)
continue
}
if lastMeasurement.FinishedAt == nil {
Expand All @@ -451,7 +491,8 @@ func calculateNextReconcileTime(run *v1alpha1.HookRun) *time.Time {
if metric.Interval != "" {
metricInterval, err := metric.Interval.Duration()
if err != nil {
klog.Warningf("HookRun: %s/%s, metric: %s. failed to parse interval: %v", run.Namespace, run.Name, metric.Name, err)
klog.Warningf("HookRun: %s/%s, metric: %s. failed to parse interval: %v", run.Namespace,
run.Name, metric.Name, err)
continue
}
interval = metricInterval
Expand All @@ -462,7 +503,8 @@ func calculateNextReconcileTime(run *v1alpha1.HookRun) *time.Time {
// there was no error (meaning we don't need to retry). no need to requeue this metric.
// NOTE: we shouldn't ever get here since it means we are not doing proper bookkeeping
// of count.
klog.Warningf("HookRun: %s/%s, metric: %s. skipping requeue. no interval or error (count: %d, effectiveCount: %d)", run.Namespace, run.Name, metric.Name, metricResult.Count, metric.EffectiveCount())
klog.Warningf("HookRun: %s/%s, metric: %s. skipping requeue. no interval or error (count: %d, " +
"effectiveCount: %d)", run.Namespace, run.Name, metric.Name, metricResult.Count, metric.EffectiveCount())
continue
}

Expand Down
12 changes: 10 additions & 2 deletions bcs-k8s/bcs-hook-operator/pkg/util/hook/hooks.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ func ValidateMetrics(metrics []v1alpha1.Metric) error {
// ValidateMetric validates a single metric spec
func ValidateMetric(metric v1alpha1.Metric) error {
if metric.Count > 0 {
if metric.Count < metric.FailureLimit {
return fmt.Errorf("count must be >= failureLimit")
if (metric.Count < metric.FailureLimit) || (metric.Count < metric.SuccessfulLimit) {
return fmt.Errorf("count must be >= failureLimit && >= successfulLimit")
}
if metric.Count < metric.InconclusiveLimit {
return fmt.Errorf("count must be >= inconclusiveLimit")
Expand All @@ -67,12 +67,20 @@ func ValidateMetric(metric v1alpha1.Metric) error {
if metric.FailureLimit < 0 {
return fmt.Errorf("failureLimit must be >= 0")
}

if metric.SuccessfulLimit < 0 {
return fmt.Errorf("successLimit must be >= 0")
}

if metric.InconclusiveLimit < 0 {
return fmt.Errorf("inconclusiveLimit must be >= 0")
}
if metric.ConsecutiveErrorLimit != nil && *metric.ConsecutiveErrorLimit < 0 {
return fmt.Errorf("consecutiveErrorLimit must be >= 0")
}
if metric.ConsecutiveSuccessfulLimit != nil && *metric.ConsecutiveSuccessfulLimit < 1 {
return fmt.Errorf("consecutiveSuccessfulLimit must be >= 1")
}
numProviders := 0

if metric.Provider.Web != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,18 @@ type Metric struct {
// FailureLimit is the maximum number of times the measurement is allowed to fail, before the
// entire metric is considered Failed (default: 0)
FailureLimit int32 `json:"failureLimit,omitempty"`
// SuccessfulLimit is the maximum number of times the measurement is to success, before the
// entire metric is considered Running (default: 0)
SuccessfulLimit int32 `json:"successfulLimit,omitempty"`
// InconclusiveLimit is the maximum number of times the measurement is allowed to measure
// Inconclusive, before the entire metric is considered Inconclusive (default: 0)
InconclusiveLimit int32 `json:"inconclusiveLimit,omitempty"`
// ConsecutiveErrorLimit is the maximum number of times the measurement is allowed to error in
// succession, before the metric is considered error (default: 4)
ConsecutiveErrorLimit *int32 `json:"consecutiveErrorLimit,omitempty"`
// ConsecutiveSuccessfulLimit is the minmum number of times the measurement is allowed to success in
// succession, before the metric is considered success
ConsecutiveSuccessfulLimit *int32 `json:"consecutiveSuccessfulLimit,omitempty"`
// Provider configuration to the external system to use to verify the analysis
// +kubebuilder:validation:Required
Provider MetricProvider `json:"provider"`
Expand Down Expand Up @@ -203,6 +209,7 @@ type MetricResult struct {
Inconclusive int32 `json:"inconclusive,omitempty"`
Error int32 `json:"error,omitempty"`
ConsecutiveError int32 `json:"consecutiveError,omitempty"`
ConsecutiveSuccessful int32 `json:"consecutiveSuccessful,omitempty"`
}

type Measurement struct {
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.