From 5f6b21a992708255950d21b567c332c74934328b Mon Sep 17 00:00:00 2001 From: Dmitri Gekhtman <62982571+DmitriGekhtman@users.noreply.github.com> Date: Wed, 8 Jun 2022 16:49:30 -0700 Subject: [PATCH] Set autoscaler imagePullPolicy to IfNotPresent. Provide override. (#297) This PR exposes the autoscaler container pull policy in the Ray CRD for testing purposes. --- ray-operator/apis/ray/v1alpha1/raycluster_types.go | 2 ++ ray-operator/apis/ray/v1alpha1/zz_generated.deepcopy.go | 5 +++++ ray-operator/config/crd/bases/ray.io_rayclusters.yaml | 4 ++++ ray-operator/controllers/ray/common/pod.go | 3 +++ ray-operator/controllers/ray/common/pod_test.go | 7 +++++-- 5 files changed, 19 insertions(+), 2 deletions(-) diff --git a/ray-operator/apis/ray/v1alpha1/raycluster_types.go b/ray-operator/apis/ray/v1alpha1/raycluster_types.go index 1e174779d28..d57d9d09c0b 100644 --- a/ray-operator/apis/ray/v1alpha1/raycluster_types.go +++ b/ray-operator/apis/ray/v1alpha1/raycluster_types.go @@ -71,6 +71,8 @@ type AutoscalerOptions struct { Resources *v1.ResourceRequirements `json:"resources,omitempty"` // Image optionally overrides the autoscaler's container image. This override is for provided for autoscaler testing and development. Image *string `json:"image,omitempty"` + // ImagePullPolicy optionally overrides the autoscaler container's image pull policy. This override is for provided for autoscaler testing and development. + ImagePullPolicy *v1.PullPolicy `json:"imagePullPolicy,omitempty"` // IdleTimeoutSeconds is the number of seconds to wait before scaling down a worker pod which is not using Ray resources. // Defaults to 300 (five minutes). IdleTimeoutSeconds *int32 `json:"idleTimeoutSeconds,omitempty"` diff --git a/ray-operator/apis/ray/v1alpha1/zz_generated.deepcopy.go b/ray-operator/apis/ray/v1alpha1/zz_generated.deepcopy.go index 1bca8047214..d8531f10f4a 100644 --- a/ray-operator/apis/ray/v1alpha1/zz_generated.deepcopy.go +++ b/ray-operator/apis/ray/v1alpha1/zz_generated.deepcopy.go @@ -23,6 +23,11 @@ func (in *AutoscalerOptions) DeepCopyInto(out *AutoscalerOptions) { *out = new(string) **out = **in } + if in.ImagePullPolicy != nil { + in, out := &in.ImagePullPolicy, &out.ImagePullPolicy + *out = new(v1.PullPolicy) + **out = **in + } if in.IdleTimeoutSeconds != nil { in, out := &in.IdleTimeoutSeconds, &out.IdleTimeoutSeconds *out = new(int32) diff --git a/ray-operator/config/crd/bases/ray.io_rayclusters.yaml b/ray-operator/config/crd/bases/ray.io_rayclusters.yaml index 43d19a56ece..549ef6ace3a 100644 --- a/ray-operator/config/crd/bases/ray.io_rayclusters.yaml +++ b/ray-operator/config/crd/bases/ray.io_rayclusters.yaml @@ -47,6 +47,10 @@ spec: description: Image optionally overrides the autoscaler's container image. type: string + imagePullPolicy: + description: ImagePullPolicy optionally overrides the autoscaler + container's image pull policy. + type: string resources: description: Resources specifies resource requests and limits for the autoscaler container. diff --git a/ray-operator/controllers/ray/common/pod.go b/ray-operator/controllers/ray/common/pod.go index 0b47b6cf465..ccef9bacaca 100644 --- a/ray-operator/controllers/ray/common/pod.go +++ b/ray-operator/controllers/ray/common/pod.go @@ -223,6 +223,9 @@ func mergeAutoscalerOverrides(autoscalerContainer *v1.Container, autoscalerOptio if autoscalerOptions.Image != nil { autoscalerContainer.Image = *autoscalerOptions.Image } + if autoscalerOptions.ImagePullPolicy != nil { + autoscalerContainer.ImagePullPolicy = *autoscalerOptions.ImagePullPolicy + } } } diff --git a/ray-operator/controllers/ray/common/pod_test.go b/ray-operator/controllers/ray/common/pod_test.go index ef94665298f..d613c450509 100644 --- a/ray-operator/controllers/ray/common/pod_test.go +++ b/ray-operator/controllers/ray/common/pod_test.go @@ -344,8 +344,9 @@ func TestBuildPodWithAutoscalerOptions(t *testing.T) { svcName := utils.GenerateServiceName(cluster.Name) customAutoscalerImage := "custom-autoscaler-xxx" + customPullPolicy := v1.PullIfNotPresent customTimeout := int32(100) - customUpscaling := "Aggressive" + customUpscaling := rayiov1alpha1.UpscalingMode("Aggressive") customResources := v1.ResourceRequirements{ Requests: v1.ResourceList{ v1.ResourceCPU: resource.MustParse("1"), @@ -358,15 +359,17 @@ func TestBuildPodWithAutoscalerOptions(t *testing.T) { } cluster.Spec.AutoscalerOptions = &rayiov1alpha1.AutoscalerOptions{ - UpscalingMode: (*rayiov1alpha1.UpscalingMode)(&customUpscaling), + UpscalingMode: &customUpscaling, IdleTimeoutSeconds: &customTimeout, Image: &customAutoscalerImage, + ImagePullPolicy: &customPullPolicy, Resources: &customResources, } podTemplateSpec := DefaultHeadPodTemplate(*cluster, cluster.Spec.HeadGroupSpec, podName, svcName) pod := BuildPod(podTemplateSpec, rayiov1alpha1.HeadNode, cluster.Spec.HeadGroupSpec.RayStartParams, svcName, &trueFlag) expectedContainer := *autoscalerContainer.DeepCopy() expectedContainer.Image = customAutoscalerImage + expectedContainer.ImagePullPolicy = customPullPolicy expectedContainer.Resources = customResources index := getAutoscalerContainerIndex(pod) actualContainer := pod.Spec.Containers[index]