From 4516e164d669aa4cccfa3fd75e57d9c95c8c3312 Mon Sep 17 00:00:00 2001 From: Prashanth Date: Thu, 16 Aug 2018 17:45:03 +0530 Subject: [PATCH 1/2] Removed any unwanted re-enqueuing of machine objects - There were several places where machine objects were re-enqueued unwantedly - This lead to several reconcilation loops and objects being updated frequently - This commit removes any such code --- pkg/controller/deployment_progress.go | 3 +- pkg/controller/deployment_util.go | 19 ++++++++++ pkg/controller/machine.go | 51 +++++++++++++++------------ pkg/controller/machine_util.go | 16 +++++++++ pkg/controller/machineset.go | 2 +- 5 files changed, 65 insertions(+), 26 deletions(-) diff --git a/pkg/controller/deployment_progress.go b/pkg/controller/deployment_progress.go index 91c6d0116..80369ebf0 100644 --- a/pkg/controller/deployment_progress.go +++ b/pkg/controller/deployment_progress.go @@ -24,7 +24,6 @@ package controller import ( "fmt" - "reflect" "time" "github.com/golang/glog" @@ -108,7 +107,7 @@ func (dc *controller) syncRolloutStatus(allISs []*v1alpha1.MachineSet, newIS *v1 } // Do not update if there is nothing new to add. - if reflect.DeepEqual(d.Status, newStatus) { + if !statusUpdateRequired(d.Status, newStatus) { // Requeue the deployment if required. dc.requeueStuckMachineDeployment(d, newStatus) return nil diff --git a/pkg/controller/deployment_util.go b/pkg/controller/deployment_util.go index 0f3a389b9..4b4e6950b 100644 --- a/pkg/controller/deployment_util.go +++ b/pkg/controller/deployment_util.go @@ -24,6 +24,7 @@ package controller import ( "fmt" + "reflect" "sort" "strconv" "strings" @@ -1017,3 +1018,21 @@ func ResolveFenceposts(maxSurge, maxUnavailable *intstrutil.IntOrString, desired return int32(surge), int32(unavailable), nil } + +// statusUpdateRequired checks for if status update is required comparing two MachineDeployment statuses +func statusUpdateRequired(old v1alpha1.MachineDeploymentStatus, new v1alpha1.MachineDeploymentStatus) bool { + + if old.AvailableReplicas == new.AvailableReplicas && + old.CollisionCount == new.CollisionCount && + len(old.FailedMachines) == len(new.FailedMachines) && + old.ObservedGeneration == new.ObservedGeneration && + old.ReadyReplicas == new.ReadyReplicas && + old.Replicas == new.Replicas && + old.UpdatedReplicas == new.UpdatedReplicas && + reflect.DeepEqual(old.Conditions, new.Conditions) { + // If all conditions are matching + return false + } + + return true +} diff --git a/pkg/controller/machine.go b/pkg/controller/machine.go index 60c09b529..4143ef7b1 100644 --- a/pkg/controller/machine.go +++ b/pkg/controller/machine.go @@ -58,7 +58,7 @@ func (c *controller) addMachine(obj interface{}) { glog.Errorf("Couldn't get key for object %+v: %v", obj, err) return } - glog.V(4).Info("Adding machine object") + glog.V(4).Infof("Add/Update/Delete machine object %q", key) c.machineQueue.Add(key) } @@ -184,7 +184,7 @@ func (c *controller) reconcileClusterMachine(machine *v1alpha1.Machine) error { } } - c.enqueueMachineAfter(machine, time.Minute*10) + c.enqueueMachineAfter(machine, 10*time.Minute) return nil } @@ -207,6 +207,7 @@ func (c *controller) addNodeToMachine(obj interface{}) { return } + glog.V(4).Infof("Add machine object backing node %q", machine.Name) c.enqueueMachine(machine) } @@ -545,8 +546,9 @@ func (c *controller) updateMachineStatus( func (c *controller) updateMachineConditions(machine *v1alpha1.Machine, conditions []v1.NodeCondition) (*v1alpha1.Machine, error) { var ( - msg string - lastOperationType v1alpha1.MachineOperationType + msg string + lastOperationType v1alpha1.MachineOperationType + objectRequiresUpdate bool ) // Get the latest version of the machine so that we can avoid conflicts @@ -556,12 +558,14 @@ func (c *controller) updateMachineConditions(machine *v1alpha1.Machine, conditio } clone := machine.DeepCopy() - clone.Status.Conditions = conditions - //glog.Info(c.isHealthy(clone)) + if nodeConditionsHaveChanged(clone.Status.Conditions, conditions) { + clone.Status.Conditions = conditions + objectRequiresUpdate = true + } if clone.Status.CurrentStatus.Phase == v1alpha1.MachineTerminating { - // If machine is already in terminating state, don't update + // If machine is already in terminating state, don't update health status } else if !c.isHealthy(clone) && clone.Status.CurrentStatus.Phase == v1alpha1.MachineRunning { // If machine is not healthy, and current state is running, @@ -580,6 +584,7 @@ func (c *controller) updateMachineConditions(machine *v1alpha1.Machine, conditio Type: v1alpha1.MachineOperationHealthCheck, LastUpdateTime: metav1.Now(), } + objectRequiresUpdate = true } else if c.isHealthy(clone) && clone.Status.CurrentStatus.Phase != v1alpha1.MachineRunning { // If machine is healhy and current machinePhase is not running. @@ -609,16 +614,22 @@ func (c *controller) updateMachineConditions(machine *v1alpha1.Machine, conditio TimeoutActive: false, LastUpdateTime: metav1.Now(), } + objectRequiresUpdate = true + } - clone, err = c.controlMachineClient.Machines(clone.Namespace).Update(clone) - if err != nil { - // Keep retrying until update goes through - glog.V(2).Infof("Warning: Updated failed, retrying, error: %q", err) - return c.updateMachineConditions(machine, conditions) + if objectRequiresUpdate { + clone, err = c.controlMachineClient.Machines(clone.Namespace).Update(clone) + if err != nil { + // Keep retrying until update goes through + glog.Warningf("Updated failed, retrying, error: %q", err) + return c.updateMachineConditions(machine, conditions) + } + + return clone, nil } - return clone, nil + return machine, nil } func (c *controller) updateMachineFinalizers(machine *v1alpha1.Machine, finalizers []string) { @@ -715,7 +726,7 @@ func (c *controller) checkMachineTimeout(machine *v1alpha1.Machine) { // Timeout value obtained by subtracting last operation with expected time out period timeOut := metav1.Now().Add(-timeOutDuration).Sub(machine.Status.CurrentStatus.LastUpdateTime.Time) if timeOut > 0 { - // If machine health timeout occurs while joining or rejoining of machine + // Machine health timeout occurs while joining or rejoining of machine if machine.Status.CurrentStatus.Phase == v1alpha1.MachinePending { // Timeout occurred while machine creation @@ -748,20 +759,14 @@ func (c *controller) checkMachineTimeout(machine *v1alpha1.Machine) { // Log the error message for machine failure glog.Error(description) + // Update the machine status to reflect the changes + c.updateMachineStatus(machine, lastOperation, currentStatus) + } else { // If timeout has not occurred, re-enqueue the machine // after a specified sleep time c.enqueueMachineAfter(machine, sleepTime) - currentStatus = v1alpha1.CurrentStatus{ - Phase: machine.Status.CurrentStatus.Phase, - TimeoutActive: true, - LastUpdateTime: machine.Status.CurrentStatus.LastUpdateTime, - } - lastOperation = machine.Status.LastOperation } - - // Update the machine status to reflect the changes - c.updateMachineStatus(machine, lastOperation, currentStatus) } } diff --git a/pkg/controller/machine_util.go b/pkg/controller/machine_util.go index 4c39596f7..e89e750ec 100644 --- a/pkg/controller/machine_util.go +++ b/pkg/controller/machine_util.go @@ -199,3 +199,19 @@ func (c *controller) validateMachineClass(classSpec *v1alpha1.ClassSpec) (interf return MachineClass, secretRef, nil } + +// nodeConditionsHaveChanged compares two node statuses to see if any of the statuses have changed +func nodeConditionsHaveChanged(machineConditions []v1.NodeCondition, nodeConditions []v1.NodeCondition) bool { + + if len(machineConditions) != len(nodeConditions) { + return true + } + + for i := range nodeConditions { + if nodeConditions[i].Status != machineConditions[i].Status { + return true + } + } + + return false +} diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index 44bb2ee98..9f9b95b79 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -140,7 +140,7 @@ func (c *controller) machineSetUpdate(old, cur interface{}) { // that bad as MachineSets that haven't met expectations yet won't // sync, and all the listing is done using local stores. if oldMachineSet.Spec.Replicas != currentMachineSet.Spec.Replicas { - glog.V(4).Infof("%v %v updated. Desired machine count change: %d->%d", currentMachineSet.Name, oldMachineSet.Spec.Replicas, currentMachineSet.Spec.Replicas) + glog.V(4).Infof("%v updated. Desired machine count change: %d->%d", currentMachineSet.Name, oldMachineSet.Spec.Replicas, currentMachineSet.Spec.Replicas) } c.enqueueMachineSet(currentMachineSet) } From 01ffe67f1b49d1acd8f49ba4d87f0bd783b9d38d Mon Sep 17 00:00:00 2001 From: Prashanth Date: Thu, 16 Aug 2018 20:03:38 +0530 Subject: [PATCH 2/2] Comparing each failed machines for deployment updates --- pkg/controller/deployment_util.go | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pkg/controller/deployment_util.go b/pkg/controller/deployment_util.go index 4b4e6950b..049d8deec 100644 --- a/pkg/controller/deployment_util.go +++ b/pkg/controller/deployment_util.go @@ -1031,6 +1031,25 @@ func statusUpdateRequired(old v1alpha1.MachineDeploymentStatus, new v1alpha1.Mac old.UpdatedReplicas == new.UpdatedReplicas && reflect.DeepEqual(old.Conditions, new.Conditions) { // If all conditions are matching + + // Iterate through all new failed machines and check if there + // exists an older machine with same name and description + for _, newMachine := range new.FailedMachines { + found := false + + for _, oldMachine := range old.FailedMachines { + if oldMachine.Name == newMachine.Name && + oldMachine.LastOperation.Description == newMachine.LastOperation.Description { + found = true + continue + } + } + + if !found { + return true + } + } + return false }