From 9a628c563ec08d900ef8e161e96243a5d98c322d Mon Sep 17 00:00:00 2001 From: Andrei Nesterov Date: Fri, 9 Jun 2023 17:45:04 +0300 Subject: [PATCH] Make replicas optional for the worker spec The replicas field must not be set when autoscaler is used because: - Autoscaler updates the field. - When GitOps systems such as FluxCD or ArgoCD are used, they keep reverting changes made by the autoscaler on every reconciliation. --- .../python_client/utils/kuberay_cluster_builder.py | 4 ++-- helm-chart/kuberay-operator/crds/ray.io_rayclusters.yaml | 1 - helm-chart/kuberay-operator/crds/ray.io_rayjobs.yaml | 1 - helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml | 1 - ray-operator/apis/ray/v1alpha1/raycluster_types.go | 2 +- ray-operator/config/crd/bases/ray.io_rayclusters.yaml | 1 - ray-operator/config/crd/bases/ray.io_rayjobs.yaml | 1 - ray-operator/config/crd/bases/ray.io_rayservices.yaml | 1 - ray-operator/config/samples/ray-cluster.autoscaler.large.yaml | 3 +-- ray-operator/config/samples/ray-cluster.autoscaler.tls.yaml | 3 +-- ray-operator/config/samples/ray-cluster.autoscaler.yaml | 3 +-- ray-operator/config/samples/ray-service.autoscaler.yaml | 3 +-- 12 files changed, 7 insertions(+), 17 deletions(-) diff --git a/clients/python-client/python_client/utils/kuberay_cluster_builder.py b/clients/python-client/python_client/utils/kuberay_cluster_builder.py index 21e829f53b1..a203577b2b8 100644 --- a/clients/python-client/python_client/utils/kuberay_cluster_builder.py +++ b/clients/python-client/python_client/utils/kuberay_cluster_builder.py @@ -125,7 +125,7 @@ def build_worker( memory_requests: str = "1G", cpu_limits: str = "2", memory_limits: str = "2G", - replicas: int = 1, + replicas: int = 0, min_replicas: int = -1, max_replicas: int = -1, ray_start_params: dict = {}, @@ -143,7 +143,7 @@ def build_worker( - memory_requests (str, optional): Memory requests for the worker pods. Default is "1G". - cpu_limits (str, optional): CPU limits for the worker pods. Default is "2". - memory_limits (str, optional): Memory limits for the worker pods. Default is "2G". - - replicas (int, optional): Number of worker pods to run. Default is 1. + - replicas (int, optional): Number of worker pods to run. Default is 0. - min_replicas (int, optional): Minimum number of worker pods to run. Default is -1. - max_replicas (int, optional): Maximum number of worker pods to run. Default is -1. - ray_start_params (dict, optional): Additional parameters to pass to the ray start command. Default is {}. diff --git a/helm-chart/kuberay-operator/crds/ray.io_rayclusters.yaml b/helm-chart/kuberay-operator/crds/ray.io_rayclusters.yaml index 81cdc698648..73e3da6a3ef 100644 --- a/helm-chart/kuberay-operator/crds/ray.io_rayclusters.yaml +++ b/helm-chart/kuberay-operator/crds/ray.io_rayclusters.yaml @@ -11568,7 +11568,6 @@ spec: - maxReplicas - minReplicas - rayStartParams - - replicas - template type: object type: array diff --git a/helm-chart/kuberay-operator/crds/ray.io_rayjobs.yaml b/helm-chart/kuberay-operator/crds/ray.io_rayjobs.yaml index afb789c5881..2b6a723f501 100644 --- a/helm-chart/kuberay-operator/crds/ray.io_rayjobs.yaml +++ b/helm-chart/kuberay-operator/crds/ray.io_rayjobs.yaml @@ -12082,7 +12082,6 @@ spec: - maxReplicas - minReplicas - rayStartParams - - replicas - template type: object type: array diff --git a/helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml b/helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml index 25ba5137995..7adf173b1e7 100644 --- a/helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml +++ b/helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml @@ -12068,7 +12068,6 @@ spec: - maxReplicas - minReplicas - rayStartParams - - replicas - template type: object type: array diff --git a/ray-operator/apis/ray/v1alpha1/raycluster_types.go b/ray-operator/apis/ray/v1alpha1/raycluster_types.go index 03aac6bf872..5597dce2783 100644 --- a/ray-operator/apis/ray/v1alpha1/raycluster_types.go +++ b/ray-operator/apis/ray/v1alpha1/raycluster_types.go @@ -47,7 +47,7 @@ type WorkerGroupSpec struct { GroupName string `json:"groupName"` // Replicas Number of desired pods in this pod group. This is a pointer to distinguish between explicit // zero and not specified. Defaults to 1. - Replicas *int32 `json:"replicas"` + Replicas *int32 `json:"replicas,omitempty"` // MinReplicas defaults to 1 MinReplicas *int32 `json:"minReplicas"` // MaxReplicas defaults to maxInt32 diff --git a/ray-operator/config/crd/bases/ray.io_rayclusters.yaml b/ray-operator/config/crd/bases/ray.io_rayclusters.yaml index 81cdc698648..73e3da6a3ef 100644 --- a/ray-operator/config/crd/bases/ray.io_rayclusters.yaml +++ b/ray-operator/config/crd/bases/ray.io_rayclusters.yaml @@ -11568,7 +11568,6 @@ spec: - maxReplicas - minReplicas - rayStartParams - - replicas - template type: object type: array diff --git a/ray-operator/config/crd/bases/ray.io_rayjobs.yaml b/ray-operator/config/crd/bases/ray.io_rayjobs.yaml index afb789c5881..2b6a723f501 100644 --- a/ray-operator/config/crd/bases/ray.io_rayjobs.yaml +++ b/ray-operator/config/crd/bases/ray.io_rayjobs.yaml @@ -12082,7 +12082,6 @@ spec: - maxReplicas - minReplicas - rayStartParams - - replicas - template type: object type: array diff --git a/ray-operator/config/crd/bases/ray.io_rayservices.yaml b/ray-operator/config/crd/bases/ray.io_rayservices.yaml index 25ba5137995..7adf173b1e7 100644 --- a/ray-operator/config/crd/bases/ray.io_rayservices.yaml +++ b/ray-operator/config/crd/bases/ray.io_rayservices.yaml @@ -12068,7 +12068,6 @@ spec: - maxReplicas - minReplicas - rayStartParams - - replicas - template type: object type: array diff --git a/ray-operator/config/samples/ray-cluster.autoscaler.large.yaml b/ray-operator/config/samples/ray-cluster.autoscaler.large.yaml index fbd7ff94478..e0323622476 100644 --- a/ray-operator/config/samples/ray-cluster.autoscaler.large.yaml +++ b/ray-operator/config/samples/ray-cluster.autoscaler.large.yaml @@ -97,8 +97,7 @@ spec: command: ["/bin/sh","-c","ray stop"] workerGroupSpecs: # the pod replicas in this group typed worker - - replicas: 1 - minReplicas: 1 + - minReplicas: 1 maxReplicas: 10 # logical group name, for this called large-group, also can be functional groupName: large-group diff --git a/ray-operator/config/samples/ray-cluster.autoscaler.tls.yaml b/ray-operator/config/samples/ray-cluster.autoscaler.tls.yaml index 65dc2cc9a3c..a51a8f82625 100644 --- a/ray-operator/config/samples/ray-cluster.autoscaler.tls.yaml +++ b/ray-operator/config/samples/ray-cluster.autoscaler.tls.yaml @@ -145,8 +145,7 @@ spec: path: gencert_head.sh workerGroupSpecs: # the pod replicas in this group typed worker - - replicas: 1 - minReplicas: 1 + - minReplicas: 1 maxReplicas: 10 groupName: small-group # The `rayStartParams` are used to configure the `ray start` command. diff --git a/ray-operator/config/samples/ray-cluster.autoscaler.yaml b/ray-operator/config/samples/ray-cluster.autoscaler.yaml index d0d4c9cfb45..dcf3375f19f 100644 --- a/ray-operator/config/samples/ray-cluster.autoscaler.yaml +++ b/ray-operator/config/samples/ray-cluster.autoscaler.yaml @@ -98,8 +98,7 @@ spec: memory: "2G" workerGroupSpecs: # the pod replicas in this group typed worker - - replicas: 1 - minReplicas: 1 + - minReplicas: 1 maxReplicas: 10 # logical group name, for this called small-group, also can be functional groupName: small-group diff --git a/ray-operator/config/samples/ray-service.autoscaler.yaml b/ray-operator/config/samples/ray-service.autoscaler.yaml index cd9c10348e6..a4603e1b700 100644 --- a/ray-operator/config/samples/ray-service.autoscaler.yaml +++ b/ray-operator/config/samples/ray-service.autoscaler.yaml @@ -83,8 +83,7 @@ spec: name: serve workerGroupSpecs: # the pod replicas in this group typed worker - - replicas: 1 - minReplicas: 1 + - minReplicas: 1 maxReplicas: 5 # logical group name, for this called small-group, also can be functional groupName: small-group