From c51568b12522b33192a9e9399702eb7705836170 Mon Sep 17 00:00:00 2001 From: Hui Song Date: Fri, 17 Jan 2025 13:32:53 -0500 Subject: [PATCH] update log time to remove timestamp --- ...ebookprocesslog_log_created_at_and_more.py | 114 +++++++++++++++ src/aap_eda/core/models/rulebook_process.py | 1 + .../core/utils/rulebook_process_logs.py | 73 ++++++++++ .../services/activation/db_log_handler.py | 10 +- .../api/test_activation_instance.py | 1 + .../core/test_rulebook_process_logs.py | 64 +++++++++ .../activation/engine/test_kubernetes.py | 2 +- .../services/activation/engine/test_podman.py | 3 +- .../unit/migrations/test_parse_log_entries.py | 136 ++++++++++++++++++ 9 files changed, 397 insertions(+), 7 deletions(-) create mode 100644 src/aap_eda/core/migrations/0056_rulebookprocesslog_log_created_at_and_more.py create mode 100644 src/aap_eda/core/utils/rulebook_process_logs.py create mode 100644 tests/integration/core/test_rulebook_process_logs.py create mode 100644 tests/unit/migrations/test_parse_log_entries.py diff --git a/src/aap_eda/core/migrations/0056_rulebookprocesslog_log_created_at_and_more.py b/src/aap_eda/core/migrations/0056_rulebookprocesslog_log_created_at_and_more.py new file mode 100644 index 000000000..6c12c34ef --- /dev/null +++ b/src/aap_eda/core/migrations/0056_rulebookprocesslog_log_created_at_and_more.py @@ -0,0 +1,114 @@ +# Generated by Django 4.2.16 on 2025-01-23 05:05 + +import logging + +from django.db import migrations, models + +from aap_eda.core.utils.rulebook_process_logs import ( + extract_datetime_and_message_from_log_entry, +) + +logger = logging.getLogger(__name__) + +CHUNK_SIZE = 1000 + + +def up_log_entries(apps, schema_editor): + rulebook_process_log_model = apps.get_model("core", "RulebookProcessLog") + + updated_entries = [] + corrupted_entries = [] + total_records = 0 + + for entry in rulebook_process_log_model.objects.iterator( + chunk_size=CHUNK_SIZE + ): + try: + dt, message = extract_datetime_and_message_from_log_entry( + entry.log + ) + entry.log_created_at = dt + entry.log = message + + updated_entries.append(entry) + except Exception as ex: + corrupted_entries.append(entry) + logger.warning( + f"Skipping parse entry {entry.id} due to error: {ex}" + ) + + # update per CHUNK_SIZE in case of large records in DB + if len(updated_entries) >= CHUNK_SIZE: + rulebook_process_log_model.objects.bulk_update( + updated_entries, ["log", "log_created_at"] + ) + total_records += len(updated_entries) + logger.info(f"{total_records} entries are parsed.") + updated_entries = [] + + # update the last trunck + if updated_entries: + rulebook_process_log_model.objects.bulk_update( + updated_entries, ["log", "log_created_at"] + ) + total_records += len(updated_entries) + + logger.info(f"Totally {total_records} entries are parsed.") + if len(corrupted_entries) > 0: + logger.info(f"Totally {len(corrupted_entries)} entries are skipped.") + + +def down_log_entries(apps, schema_editor): + rulebook_process_log_model = apps.get_model("core", "RulebookProcessLog") + + updated_entries = [] + total_records = 0 + + for entry in rulebook_process_log_model.objects.iterator( + chunk_size=CHUNK_SIZE + ): + try: + if entry.log_created_at: + timestamp_str = entry.log_created_at.strftime( + "%Y-%m-%d %H:%M:%S,%f" + ).rstrip("0") + message = f"{timestamp_str} {entry.log}" + entry.log = message + updated_entries.append(entry) + except ValueError as err: + logger.warning( + f"Skipping reverse entry {entry.id} due to error: {err}" + ) + + # update per CHUNK_SIZE in case of large records in DB + if len(updated_entries) >= CHUNK_SIZE: + rulebook_process_log_model.objects.bulk_update( + updated_entries, ["log"] + ) + total_records += len(updated_entries) + logger.info(f"{total_records} entries are reversed.") + updated_entries = [] + + # update the last trunck + if updated_entries: + rulebook_process_log_model.objects.bulk_update( + updated_entries, ["log"] + ) + total_records += len(updated_entries) + + logger.info(f"Totally {total_records} entries are reversed.") + + +class Migration(migrations.Migration): + dependencies = [ + ("core", "0055_activation_created_by_activation_modified_by_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="rulebookprocesslog", + name="log_created_at", + field=models.DateTimeField(null=True), + ), + migrations.RunPython(up_log_entries, down_log_entries), + ] diff --git a/src/aap_eda/core/models/rulebook_process.py b/src/aap_eda/core/models/rulebook_process.py index bcda9904e..350701c08 100644 --- a/src/aap_eda/core/models/rulebook_process.py +++ b/src/aap_eda/core/models/rulebook_process.py @@ -192,6 +192,7 @@ class Meta: ) log = models.TextField() log_timestamp = models.BigIntegerField(null=False, default=0) + log_created_at = models.DateTimeField(null=True) class RulebookProcessQueue(models.Model): diff --git a/src/aap_eda/core/utils/rulebook_process_logs.py b/src/aap_eda/core/utils/rulebook_process_logs.py new file mode 100644 index 000000000..f17729f83 --- /dev/null +++ b/src/aap_eda/core/utils/rulebook_process_logs.py @@ -0,0 +1,73 @@ +# Copyright 2025 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +from datetime import datetime +from typing import Optional, Tuple + +from django.utils.timezone import make_aware + + +def extract_datetime_and_message_from_log_entry( + log_entry: str, +) -> Tuple[Optional[datetime], str]: + """ + Extract the timestamp and message parts from a log entry. + + Supported timestamp patterns: + - "%Y-%m-%d %H:%M:%S,%f" + - "%Y-%m-%d %H:%M:%S.%f" + - "%Y-%m-%d %H:%M:%S %f" + - "** %Y-%m-%d %H:%M:%S.%f" + + Parameters: + log_entry (str): The log entry string. + + Returns: + Tuple[Optional[datetime], str]: A tuple containing the extracted + timestamp as a datetime object (or None if invalid) and the + message as a string. + """ + timestamp_patterns = [ + r"^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}(?:[,\. ]\d{1,6}) ", + r"^\*\* \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{6} ", + ] + datetime_formats = [ + "%Y-%m-%d %H:%M:%S,%f", + "%Y-%m-%d %H:%M:%S.%f", + "%Y-%m-%d %H:%M:%S %f", + ] + + for pattern in timestamp_patterns: + match = re.match(pattern, log_entry) + if match: + # Extract the timestamp string + timestamp_str = ( + match.group(0).strip("* ").strip() + ) # Remove "**" if present + message = log_entry[ + match.end() : + ].strip() # Extract the remaining message + if message.startswith("- "): + message = message[2:] # Remove ' - ' characters + + # Convert the timestamp string into a datetime object + for fmt in datetime_formats: + try: + timestamp_obj = datetime.strptime(timestamp_str, fmt) + return make_aware(timestamp_obj), message + except ValueError: + continue + + return None, log_entry diff --git a/src/aap_eda/services/activation/db_log_handler.py b/src/aap_eda/services/activation/db_log_handler.py index 44dc9787c..beef7d7d0 100644 --- a/src/aap_eda/services/activation/db_log_handler.py +++ b/src/aap_eda/services/activation/db_log_handler.py @@ -20,6 +20,9 @@ from django.db import IntegrityError from aap_eda.core import models +from aap_eda.core.utils.rulebook_process_logs import ( + extract_datetime_and_message_from_log_entry, +) from aap_eda.services.activation.engine.common import LogHandler from aap_eda.services.activation.engine.exceptions import ( ContainerUpdateLogsError, @@ -59,15 +62,14 @@ def write( lines = [lines] for line in lines: - if timestamp: - dt = f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S,%f')[:-3]}" - line = f"{dt} {line}" + dt, message = extract_datetime_and_message_from_log_entry(line) self.activation_instance_log_buffer.append( models.RulebookProcessLog( - log=line, + log=message, activation_instance_id=self.activation_instance_id, log_timestamp=log_timestamp, + log_created_at=dt, ) ) self.line_count += 1 diff --git a/tests/integration/api/test_activation_instance.py b/tests/integration/api/test_activation_instance.py index 028ba5f3e..07562e09a 100644 --- a/tests/integration/api/test_activation_instance.py +++ b/tests/integration/api/test_activation_instance.py @@ -111,6 +111,7 @@ def test_list_logs_from_activation_instance( "id", "log", "log_timestamp", + "log_created_at", "activation_instance", ] diff --git a/tests/integration/core/test_rulebook_process_logs.py b/tests/integration/core/test_rulebook_process_logs.py new file mode 100644 index 000000000..ab89565c7 --- /dev/null +++ b/tests/integration/core/test_rulebook_process_logs.py @@ -0,0 +1,64 @@ +# Copyright 2025 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from datetime import datetime + +import pytest +from django.utils.timezone import make_aware + +from aap_eda.core.utils.rulebook_process_logs import ( + extract_datetime_and_message_from_log_entry, +) + + +@pytest.mark.parametrize( + ("log_entry", "expected_dt", "expected_msg"), + [ + ( + "2025-01-17 18:39:49,191 Starting Container", + make_aware(datetime(2025, 1, 17, 18, 39, 49, 191000)), + "Starting Container", + ), + ( + "2025-01-17 18:39:55,215 - ansible_rulebook.action.run_job_template", # noqa: E501 + make_aware(datetime(2025, 1, 17, 18, 39, 55, 215000)), + "ansible_rulebook.action.run_job_template", + ), + ( + "** 2025-01-17 18:39:55.222773 [debug] ****", + make_aware(datetime(2025, 1, 17, 18, 39, 55, 222773)), + "[debug] ****", + ), + ( + "2025-01-17 18:43:38 638 [main] DEBUG org.drools.ansible", + make_aware(datetime(2025, 1, 17, 18, 43, 38, 638000)), + "[main] DEBUG org.drools.ansible", + ), + ( + "Ruleset: Long Running Range", + None, + "Ruleset: Long Running Range", + ), + ], +) +@pytest.mark.django_db +def test_extract_datetimes( + log_entry: str, + expected_dt: str, + expected_msg: str, +): + dt, message = extract_datetime_and_message_from_log_entry(log_entry) + + assert dt == expected_dt + assert message == expected_msg diff --git a/tests/integration/services/activation/engine/test_kubernetes.py b/tests/integration/services/activation/engine/test_kubernetes.py index 90f7bb1f5..2076d6dac 100644 --- a/tests/integration/services/activation/engine/test_kubernetes.py +++ b/tests/integration/services/activation/engine/test_kubernetes.py @@ -612,7 +612,7 @@ def test_update_logs(init_kubernetes_data, kubernetes_engine): ): pod_mock.status.container_statuses = get_pod_statuses("running") log_mock = mock.Mock() - message = "2023-10-30 19:18:48,375 INFO Result is kept for 500 seconds" + message = "INFO Result is kept for 500 seconds" with mock.patch.object(engine.client, "core_api") as core_api_mock: core_api_mock.read_namespaced_pod_log.return_value = log_mock log_mock.splitlines.return_value = [ diff --git a/tests/integration/services/activation/engine/test_podman.py b/tests/integration/services/activation/engine/test_podman.py index f7a8dcfa6..f8f85a6c8 100644 --- a/tests/integration/services/activation/engine/test_podman.py +++ b/tests/integration/services/activation/engine/test_podman.py @@ -645,8 +645,7 @@ def test_engine_update_logs(init_podman_data, podman_engine): log_handler = DBLogger(init_podman_data.activation_instance.id) init_log_read_at = init_podman_data.activation_instance.log_read_at message = ( - "2023-10-31 15:28:01,318 - ansible_rulebook.engine - INFO - " - "Calling main in ansible.eda.range" + "ansible_rulebook.engine - INFO - Calling main in ansible.eda.range" ) container_mock = mock.Mock() diff --git a/tests/unit/migrations/test_parse_log_entries.py b/tests/unit/migrations/test_parse_log_entries.py new file mode 100644 index 000000000..39b866a6d --- /dev/null +++ b/tests/unit/migrations/test_parse_log_entries.py @@ -0,0 +1,136 @@ +# Copyright 2025 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from datetime import datetime +from typing import Type + +import pytest +from django.core.management import call_command +from django.db import connection +from django.db.migrations.executor import MigrationExecutor +from django.db.models import Model +from django.utils.timezone import make_aware + +from aap_eda.core import models + +TEST_DATA = [ + { + "msg": "2025-01-17 18:39:49,191 Starting Container", + "dt": make_aware(datetime(2025, 1, 17, 18, 39, 49, 191000)), + "log": "Starting Container", + }, + { + "msg": "** 2025-01-17 18:39:55.222773 [debug] ****", + "dt": make_aware(datetime(2025, 1, 17, 18, 39, 55, 222773)), + "log": "[debug] ****", + }, + { + "msg": "2025-01-17 18:43:38 638 [main] DEBUG org.drools.ansible", + "dt": make_aware(datetime(2025, 1, 17, 18, 43, 38, 638000)), + "log": "[main] DEBUG org.drools.ansible", + }, +] + + +def get_historical_model( + app_label: str, model_name: str, migration_name: str +) -> Type[Model]: + """ + Retrieves the historical model as it existed at the specified migration. + """ + executor = MigrationExecutor(connection) + + # Load the migration state at the specified migration + state = executor.loader.project_state((app_label, migration_name)) + + return state.apps.get_model(app_label, model_name) + + +@pytest.fixture +def dummy_rulebook_process(default_organization): + data = "seccret" + user = models.User.objects.create( + username="tester", + password=data, + first_name="Adam", + last_name="Tester", + email="adam@example.com", + ) + activation = models.Activation.objects.create( + name="test-activation", + user=user, + organization=default_organization, + ) + return models.RulebookProcess.objects.create( + name="test-rulebook-process", + activation=activation, + organization=default_organization, + ) + + +@pytest.fixture +def historical_rulebook_process_logs( + dummy_rulebook_process: models.RulebookProcess, +) -> list[models.RulebookProcessLog]: + # get historical model + historical_rulebook_process_log_model = get_historical_model( + "core", + "RulebookProcessLog", + "0055_activation_created_by_activation_modified_by_and_more", + ) + + """Return a list of rulebook process logs.""" + return historical_rulebook_process_log_model.objects.bulk_create( + historical_rulebook_process_log_model( + log=data["msg"], + activation_instance_id=dummy_rulebook_process.id, + ) + for data in TEST_DATA + ) + + +@pytest.fixture(autouse=True) +def reset_migrations(): + call_command("migrate", "core", "0055") + yield + call_command("migrate") + + +@pytest.mark.django_db(transaction=True) +def test_migration(historical_rulebook_process_logs): + for log in historical_rulebook_process_logs: + assert not hasattr(log, "log_created_at") + + call_command("migrate", "core", "0056") + + for i, log in enumerate(models.RulebookProcessLog.objects.all()): + log.refresh_from_db() + assert log.log_created_at == TEST_DATA[i]["dt"] + assert log.log == TEST_DATA[i]["log"] + + # test backward migration + call_command("migrate", "core", "0055") + + historical_model = get_historical_model( + "core", + "RulebookProcessLog", + "0055_activation_created_by_activation_modified_by_and_more", + ) + + for i, log in enumerate(historical_model.objects.all()): + assert not hasattr(log, "log_created_at") + assert ( + TEST_DATA[i]["dt"].strftime("%Y-%m-%d %H:%M:%S,%f").rstrip("0") + in log.log + )