From 029dd7ec2c5d714fd1e0bf40e4ac85b13bf2c29c Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Wed, 9 Aug 2023 11:19:35 -0700 Subject: [PATCH] [RayService] Revisit the conditions under which a RayService is considered unhealthy and the default threshold (#1293) Revisit the conditions under which a RayService is considered unhealthy and the default threshold --- .../controllers/ray/rayservice_controller.go | 20 +++++-------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/ray-operator/controllers/ray/rayservice_controller.go b/ray-operator/controllers/ray/rayservice_controller.go index 990b3a826cb..68158ad7eeb 100644 --- a/ray-operator/controllers/ray/rayservice_controller.go +++ b/ray-operator/controllers/ray/rayservice_controller.go @@ -734,8 +734,8 @@ func (r *RayServiceReconciler) updateServeDeployment(ctx context.Context, raySer // // (1) `isHealthy` is used to determine whether restart the RayCluster or not. // (2) `isReady` is used to determine whether the Serve applications in the RayCluster are ready to serve incoming traffic or not. -// (3) `err`: If `err` is not nil, it means that KubeRay failed to get Serve application statuses from the dashboard agent. We should take a look at dashboard agent rather than Ray Serve applications. - +// (3) `err`: If `err` is not nil, it means that KubeRay failed to get Serve application statuses from the dashboard agent. We should take a +// look at dashboard agent rather than Ray Serve applications. func (r *RayServiceReconciler) getAndCheckServeStatus(ctx context.Context, dashboardClient utils.RayDashboardClientInterface, rayServiceServeStatus *rayv1alpha1.RayServiceStatus, serveConfigType utils.RayServeConfigType, unhealthySecondThreshold *int32) (bool, bool, error) { // If the `unhealthySecondThreshold` value is non-nil, then we will use that value. Otherwise, we will use the value ServiceUnhealthySecondThreshold // which can be set in a test. This is used for testing purposes. @@ -1162,24 +1162,14 @@ func (r *RayServiceReconciler) labelHealthyServePods(ctx context.Context, rayClu if pod.Labels == nil { pod.Labels = make(map[string]string) } - - // Make a copy of the labels for comparison later, to decide whether we need to push an update. - originalLabels := make(map[string]string, len(pod.Labels)) - for key, value := range pod.Labels { - originalLabels[key] = value - } - if httpProxyClient.CheckHealth() == nil { pod.Labels[common.RayClusterServingServiceLabelKey] = common.EnableRayClusterServingServiceTrue } else { pod.Labels[common.RayClusterServingServiceLabelKey] = common.EnableRayClusterServingServiceFalse } - - if !reflect.DeepEqual(originalLabels, pod.Labels) { - if updateErr := r.Update(ctx, &pod); updateErr != nil { - r.Log.Error(updateErr, "Pod label Update error!", "Pod.Error", updateErr) - return updateErr - } + if updateErr := r.Update(ctx, &pod); updateErr != nil { + r.Log.Error(updateErr, "Pod label Update error!", "Pod.Error", updateErr) + return updateErr } }