caas-team · samuel-esp · Oct 16, 2024 · Oct 12, 2024 · Oct 12, 2024 · Oct 12, 2024
@@ -37,6 +37,7 @@ Scale down / "pause" Kubernetes workload (`Deployments`, `StatefulSets`,
     - [Scaling Jobs Natively](#scaling-jobs-natively)
     - [Scaling Jobs With Admission Controller](#scaling-jobs-with-admission-controller)
     - [Scaling DaemonSets](#scaling-daemonsets)
+    - [Scaling ScaledObjects](#scaling-scaledobjects)
     - [Matching Labels Argument](#matching-labels-argument)
     - [Namespace Defaults](#namespace-defaults)
   - [Migrate From Codeberg](#migrate-from-codeberg)
@@ -594,6 +595,31 @@ The feature to scale DaemonSets can be very useful for reducing the base occupan
 1. Downtime Hours: Kube Downscaler will add to each targeted DaemonSet a Node Selector that cannot be satisfied `kube-downscaler-non-existent=true`
 2. Uptime Hours: Kube Downscaler will remove the `kube-downscaler-non-existent=true` Node Selector from each targeted DaemonSet
 
+### Scaling ScaledObjects
+
+The ability to downscale ScaledObjects is very useful for workloads that use Keda to support 
+a wider range of horizontal scaling metrics compared to the native Horizontal Pod Autoscaler (HPA). 
+Keda provides a built-in way to disable ScaledObjects when they are not needed. This can be achieved by using
+the annotation `"autoscaling.keda.sh/paused-replicas"`.
+
+The KubeDownscaler algorithm will apply the annotation `"autoscaling.keda.sh/paused-replicas" `
+during downtime periods, setting its value to what the user specifies through the KubeDownscaler argument `--downtime-replicas`
+or the workload annotation `"downscaler/downtime-replicas"`. During uptime, KubeDownscaler will remove the 
+`"autoscaling.keda.sh/paused-replicas"` annotation, allowing the ScaledObject to operate as originally configured.
+
+**Important**: When using the `"downscaler/downtime-replicas"` annotation at the workload level, it is crucial that
+this annotation is included in both the ScaledObject and the corresponding Deployment or StatefulSet that it controls
+and the values of the annotation must match in both locations. Alternatively, it is possible to exclude the Deployment 
+or StatefulSet from scaling by using the annotation `"downscaler/exclude"`, while keeping downscaling active only 
+on the ScaledObject.
+
+**Important**: KubeDownscaler has an automatic mechanism that detects if the `"autoscaling.keda.sh/paused-replicas" `
+annotation is already present on the ScaledObject. If that is the case, KubeDownscaler will overwrite it 
+with the target value specified for downtime and, during uptime, will restore the original value.
+
+**Technical Detail**: During downscaling, KubeDownscaler will set the annotation `"downscaler/original-replicas"` to -1, this value acts as a placeholder to indicate
+that the ScaledObject was active during uptime.
+
 ### Matching Labels Argument
 
 Labels, in Kubernetes, are key-value pairs that can be used to identify and group resources.

@@ -12,16 +12,18 @@ class ScaledObject(NamespacedAPIObject):
     keda_pause_annotation = "autoscaling.keda.sh/paused-replicas"
     last_keda_pause_annotation_if_present = "downscaler/original-pause-replicas"
 
+    # If keda_pause_annotation is not present return -1 which means the ScaledObject is active
+    # Otherwise returns the amount of replicas specified inside keda_pause_annotation
     @property
     def replicas(self):
         if ScaledObject.keda_pause_annotation in self.annotations:
             if self.annotations[ScaledObject.keda_pause_annotation] is None:
-                replicas = 1
+                replicas = -1
             elif self.annotations[ScaledObject.keda_pause_annotation] == "0":
                 replicas = 0
             elif self.annotations[ScaledObject.keda_pause_annotation] != "0" and self.annotations[ScaledObject.keda_pause_annotation] is not None:
                 replicas = int(self.annotations[ScaledObject.keda_pause_annotation])
         else:
-            replicas = 1
+            replicas = -1
 
         return replicas
@@ -41,6 +41,11 @@
 DOWNTIME_REPLICAS_ANNOTATION = "downscaler/downtime-replicas"
 GRACE_PERIOD_ANNOTATION="downscaler/grace-period"
 
+# GoLang 32-bit signed integer max value + 1. The value was choosen because 2147483647 is the max allowed
+# for Deployment/StatefulSet.spec.template.replicas. This value is used to allow
+# ScaledObject to support "downscaler/downtime-replcas" annotation
+KUBERNETES_MAX_ALLOWED_REPLICAS = 2147483647
+
 RESOURCE_CLASSES = [
     Deployment,
     StatefulSet,
@@ -406,6 +411,16 @@ def get_replicas(
         logger.debug(
             f"{resource.kind} {resource.namespace}/{resource.name} is {state} (original: {original_state}, uptime: {uptime})"
         )
+    elif resource.kind == "ScaledObject":
+        replicas = resource.replicas
+        if replicas == KUBERNETES_MAX_ALLOWED_REPLICAS + 1:
+            logger.debug(
+                f"{resource.kind} {resource.namespace}/{resource.name} is not suspended (uptime: {uptime})"
+            )
+        else:
+            logger.debug(
+                f"{resource.kind} {resource.namespace}/{resource.name} is suspended (uptime: {uptime})"
+            )
     else:
         replicas = resource.replicas
         logger.debug(
@@ -665,7 +680,7 @@ def scale_down(
             if resource.annotations[ScaledObject.keda_pause_annotation] is not None:
                 paused_replicas = resource.annotations[ScaledObject.keda_pause_annotation]
                 resource.annotations[ScaledObject.last_keda_pause_annotation_if_present] = paused_replicas
-        resource.annotations[ScaledObject.keda_pause_annotation] = "0"
+        resource.annotations[ScaledObject.keda_pause_annotation] = str(target_replicas)
         logger.info(
             f"Pausing {resource.kind} {resource.namespace}/{resource.name} (uptime: {uptime}, downtime: {downtime})"
         )
@@ -929,7 +944,7 @@ def autoscale_resource(
                 and is_uptime
                 and replicas == downtime_replicas
                 and original_replicas
-                and original_replicas > 0
+                and (original_replicas > 0 or original_replicas == -1)
             ):
                 scale_up(
                     resource,
@@ -944,8 +959,9 @@ def autoscale_resource(
             elif (
                 not ignore
                 and not is_uptime
-                and replicas > 0
-                and replicas > downtime_replicas
+                and (replicas > 0 
+                and replicas > downtime_replicas 
+                or replicas == -1)
             ):
                 if within_grace_period(
                     resource, grace_period, now, deployment_time_annotation

@@ -1304,4 +1304,4 @@ def test_upscale_scaledobject_without_keda_pause_annotation():
     # Check if the annotations have been correctly updated for the upscale operation
     assert so.annotations[ScaledObject.keda_pause_annotation] is None
     assert so.annotations.get(ScaledObject.last_keda_pause_annotation_if_present) is None
-    assert so.replicas == 1
+    assert so.replicas == -1
@@ -51,7 +51,7 @@ def test_scaledobject():
     scalable_mock = {"metadata": {}}
     api_mock.obj = MagicMock(name="APIObjMock")
     d = ScaledObject(api_mock, scalable_mock)
-    assert d.replicas == 1
+    assert d.replicas == -1
     d.annotations[ScaledObject.keda_pause_annotation] = "0"
     assert d.replicas == 0