Skip to content

Commit

Permalink
feature: Add environment variable controlling the log grooming freque…
Browse files Browse the repository at this point in the history
…ncy (apache#46237)

* New feature: Add env variable controlling the log grooming frequency

* pre-commit

* fix: copy+paste errors, adjust test

* format
  • Loading branch information
stefankeidel authored Feb 13, 2025
1 parent f50f1ce commit 389ae2e
Show file tree
Hide file tree
Showing 9 changed files with 70 additions and 8 deletions.
3 changes: 2 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -1271,10 +1271,11 @@ set -euo pipefail

readonly DIRECTORY="${AIRFLOW_HOME:-/usr/local/airflow}"
readonly RETENTION="${AIRFLOW__LOG_RETENTION_DAYS:-15}"
readonly FREQUENCY="${AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES:-15}"

trap "exit" INT TERM

readonly EVERY=$((15*60))
readonly EVERY=$((FREQUENCY*60))

echo "Cleaning logs every $EVERY seconds"

Expand Down
4 changes: 4 additions & 0 deletions chart/templates/dag-processor/dag-processor-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,10 @@ spec:
{{- if .Values.dagProcessor.logGroomerSidecar.retentionDays }}
- name: AIRFLOW__LOG_RETENTION_DAYS
value: "{{ .Values.dagProcessor.logGroomerSidecar.retentionDays }}"
{{- end }}
{{- if .Values.dagProcessor.logGroomerSidecar.frequencyMinutes }}
- name: AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES
value: "{{ .Values.dagProcessor.logGroomerSidecar.frequencyMinutes }}"
{{- end }}
- name: AIRFLOW_HOME
value: "{{ .Values.airflowHome }}"
Expand Down
4 changes: 4 additions & 0 deletions chart/templates/scheduler/scheduler-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,10 @@ spec:
{{- if .Values.scheduler.logGroomerSidecar.retentionDays }}
- name: AIRFLOW__LOG_RETENTION_DAYS
value: "{{ .Values.scheduler.logGroomerSidecar.retentionDays }}"
{{- end }}
{{- if .Values.scheduler.logGroomerSidecar.frequencyMinutes }}
- name: AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES
value: "{{ .Values.scheduler.logGroomerSidecar.frequencyMinutes }}"
{{- end }}
- name: AIRFLOW_HOME
value: "{{ .Values.airflowHome }}"
Expand Down
4 changes: 4 additions & 0 deletions chart/templates/triggerer/triggerer-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,10 @@ spec:
{{- if .Values.triggerer.logGroomerSidecar.retentionDays }}
- name: AIRFLOW__LOG_RETENTION_DAYS
value: "{{ .Values.triggerer.logGroomerSidecar.retentionDays }}"
{{- end }}
{{- if .Values.triggerer.logGroomerSidecar.frequencyMinutes }}
- name: AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES
value: "{{ .Values.triggerer.logGroomerSidecar.frequencyMinutes }}"
{{- end }}
- name: AIRFLOW_HOME
value: "{{ .Values.airflowHome }}"
Expand Down
4 changes: 4 additions & 0 deletions chart/templates/workers/worker-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,10 @@ spec:
{{- if .Values.workers.logGroomerSidecar.retentionDays }}
- name: AIRFLOW__LOG_RETENTION_DAYS
value: "{{ .Values.workers.logGroomerSidecar.retentionDays }}"
{{- end }}
{{- if .Values.workers.logGroomerSidecar.frequencyMinutes }}
- name: AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES
value: "{{ .Values.workers.logGroomerSidecar.frequencyMinutes }}"
{{- end }}
- name: AIRFLOW_HOME
value: "{{ .Values.airflowHome }}"
Expand Down
5 changes: 5 additions & 0 deletions chart/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -12205,6 +12205,11 @@
"type": "integer",
"default": 15
},
"frequencyMinutes": {
"description": "Number of minutes between attempts to groom the Airflow logs in log groomer sidecar.",
"type": "integer",
"default": 15
},
"env": {
"description": "Add additional env vars to log groomer sidecar container (templated).",
"items": {
Expand Down
8 changes: 8 additions & 0 deletions chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -824,6 +824,8 @@ workers:
args: ["bash", "/clean-logs"]
# Number of days to retain logs
retentionDays: 15
# frequency to attempt to groom logs, in minutes
frequencyMinutes: 15
resources: {}
# limits:
# cpu: 100m
Expand Down Expand Up @@ -1026,6 +1028,8 @@ scheduler:
args: ["bash", "/clean-logs"]
# Number of days to retain logs
retentionDays: 15
# frequency to attempt to groom logs, in minutes
frequencyMinutes: 15
resources: {}
# limits:
# cpu: 100m
Expand Down Expand Up @@ -1739,6 +1743,8 @@ triggerer:
args: ["bash", "/clean-logs"]
# Number of days to retain logs
retentionDays: 15
# frequency to attempt to groom logs, in minutes
frequencyMinutes: 15
resources: {}
# limits:
# cpu: 100m
Expand Down Expand Up @@ -1925,6 +1931,8 @@ dagProcessor:
args: ["bash", "/clean-logs"]
# Number of days to retain logs
retentionDays: 15
# frequency to attempt to groom logs, in minutes
frequencyMinutes: 15
resources: {}
# limits:
# cpu: 100m
Expand Down
3 changes: 2 additions & 1 deletion scripts/docker/clean-logs.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@ set -euo pipefail

readonly DIRECTORY="${AIRFLOW_HOME:-/usr/local/airflow}"
readonly RETENTION="${AIRFLOW__LOG_RETENTION_DAYS:-15}"
readonly FREQUENCY="${AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES:-15}"

trap "exit" INT TERM

readonly EVERY=$((15*60))
readonly EVERY=$((FREQUENCY*60))

echo "Cleaning logs every $EVERY seconds"

Expand Down
43 changes: 37 additions & 6 deletions tests/charts/log_groomer.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,14 +182,42 @@ def test_log_groomer_retention_days_overrides(self, retention_days, retention_re

if retention_result:
assert (
jmespath.search("spec.template.spec.containers[1].env[0].name", docs[0])
== "AIRFLOW__LOG_RETENTION_DAYS"
jmespath.search(
"spec.template.spec.containers[1].env[?name=='AIRFLOW__LOG_RETENTION_DAYS'].value | [0]",
docs[0],
)
== retention_result
)
assert retention_result == jmespath.search(
"spec.template.spec.containers[1].env[0].value", docs[0]
else:
assert len(jmespath.search("spec.template.spec.containers[1].env", docs[0])) == 2

@pytest.mark.parametrize("frequency_minutes, frequency_result", [(None, None), (20, "20")])
def test_log_groomer_frequency_minutes_overrides(self, frequency_minutes, frequency_result):
if self.obj_name == "dag-processor":
values = {
"dagProcessor": {
"enabled": True,
"logGroomerSidecar": {"frequencyMinutes": frequency_minutes},
}
}
else:
values = {f"{self.folder}": {"logGroomerSidecar": {"frequencyMinutes": frequency_minutes}}}

docs = render_chart(
values=values,
show_only=[f"templates/{self.folder}/{self.obj_name}-deployment.yaml"],
)

if frequency_result:
assert (
jmespath.search(
"spec.template.spec.containers[1].env[?name=='AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES'].value | [0]",
docs[0],
)
== frequency_result
)
else:
assert len(jmespath.search("spec.template.spec.containers[1].env", docs[0])) == 1
assert len(jmespath.search("spec.template.spec.containers[1].env", docs[0])) == 2

def test_log_groomer_resources(self):
if self.obj_name == "dag-processor":
Expand Down Expand Up @@ -242,4 +270,7 @@ def test_log_groomer_has_airflow_home(self):
values=values, show_only=[f"templates/{self.folder}/{self.obj_name}-deployment.yaml"]
)

assert jmespath.search("spec.template.spec.containers[1].env[1].name", docs[0]) == "AIRFLOW_HOME"
assert (
jmespath.search("spec.template.spec.containers[1].env[?name=='AIRFLOW_HOME'].name | [0]", docs[0])
== "AIRFLOW_HOME"
)

0 comments on commit 389ae2e

Please sign in to comment.