From ca614894f230b548f4bcfe6293ec54754287d527 Mon Sep 17 00:00:00 2001 From: Hui Song Date: Fri, 17 Jan 2025 13:32:53 -0500 Subject: [PATCH] update log time to remove timestamp --- ...ebookprocesslog_log_created_at_and_more.py | 98 +++++++++ src/aap_eda/core/models/rulebook_process.py | 1 + .../core/utils/rulebook_process_logs.py | 73 +++++++ .../services/activation/db_log_handler.py | 10 +- .../api/test_activation_instance.py | 1 + .../migrations/test_0056_parse_log_entries.py | 194 ++++++++++++++++++ .../migrations/test_update_credential_type.py | 0 .../core/test_rulebook_process_logs.py | 73 +++++++ .../activation/engine/test_kubernetes.py | 2 +- .../services/activation/engine/test_podman.py | 3 +- 10 files changed, 448 insertions(+), 7 deletions(-) create mode 100644 src/aap_eda/core/migrations/0056_rulebookprocesslog_log_created_at_and_more.py create mode 100644 src/aap_eda/core/utils/rulebook_process_logs.py create mode 100644 tests/integration/core/migrations/test_0056_parse_log_entries.py rename tests/{unit => integration/core}/migrations/test_update_credential_type.py (100%) create mode 100644 tests/integration/core/test_rulebook_process_logs.py diff --git a/src/aap_eda/core/migrations/0056_rulebookprocesslog_log_created_at_and_more.py b/src/aap_eda/core/migrations/0056_rulebookprocesslog_log_created_at_and_more.py new file mode 100644 index 000000000..c469a4904 --- /dev/null +++ b/src/aap_eda/core/migrations/0056_rulebookprocesslog_log_created_at_and_more.py @@ -0,0 +1,98 @@ +# Generated by Django 4.2.16 on 2025-01-23 05:05 + +import logging + +from django.db import migrations, models + +from aap_eda.core.utils.rulebook_process_logs import ( + extract_datetime_and_message_from_log_entry, +) + +logger = logging.getLogger(__name__) + +CHUNK_SIZE = 1000 + + +def up_log_entries(apps, schema_editor): + rulebook_process_log_model = apps.get_model("core", "RulebookProcessLog") + + updated_entries = [] + total_records = 0 + + for entry in rulebook_process_log_model.objects.iterator( + chunk_size=CHUNK_SIZE + ): + dt, message = extract_datetime_and_message_from_log_entry(entry.log) + entry.log_created_at = dt + entry.log = message + updated_entries.append(entry) + + # update per CHUNK_SIZE in case of large records in DB + if len(updated_entries) >= CHUNK_SIZE: + rulebook_process_log_model.objects.bulk_update( + updated_entries, ["log", "log_created_at"] + ) + total_records += len(updated_entries) + logger.info(f"{total_records} entries are parsed.") + updated_entries.clear() + + # update the last trunck + if updated_entries: + rulebook_process_log_model.objects.bulk_update( + updated_entries, ["log", "log_created_at"] + ) + total_records += len(updated_entries) + + logger.info(f"Totally {total_records} entries are parsed.") + + +def down_log_entries(apps, schema_editor): + rulebook_process_log_model = apps.get_model("core", "RulebookProcessLog") + + updated_entries = [] + total_records = 0 + + for entry in rulebook_process_log_model.objects.iterator( + chunk_size=CHUNK_SIZE + ): + if entry.log_created_at: + timestamp_str = entry.log_created_at.strftime( + "%Y-%m-%d %H:%M:%S,%f" + ).rstrip("0") + message = f"{timestamp_str} {entry.log}" + entry.log = message + + updated_entries.append(entry) + + # update per CHUNK_SIZE in case of large records in DB + if len(updated_entries) >= CHUNK_SIZE: + rulebook_process_log_model.objects.bulk_update( + updated_entries, ["log"] + ) + total_records += len(updated_entries) + logger.info(f"{total_records} entries are reversed.") + updated_entries = [] + + # update the last trunck + if updated_entries: + rulebook_process_log_model.objects.bulk_update( + updated_entries, ["log"] + ) + total_records += len(updated_entries) + + logger.info(f"Totally {total_records} entries are reversed.") + + +class Migration(migrations.Migration): + dependencies = [ + ("core", "0055_activation_created_by_activation_modified_by_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="rulebookprocesslog", + name="log_created_at", + field=models.DateTimeField(null=True), + ), + migrations.RunPython(up_log_entries, down_log_entries), + ] diff --git a/src/aap_eda/core/models/rulebook_process.py b/src/aap_eda/core/models/rulebook_process.py index bcda9904e..350701c08 100644 --- a/src/aap_eda/core/models/rulebook_process.py +++ b/src/aap_eda/core/models/rulebook_process.py @@ -192,6 +192,7 @@ class Meta: ) log = models.TextField() log_timestamp = models.BigIntegerField(null=False, default=0) + log_created_at = models.DateTimeField(null=True) class RulebookProcessQueue(models.Model): diff --git a/src/aap_eda/core/utils/rulebook_process_logs.py b/src/aap_eda/core/utils/rulebook_process_logs.py new file mode 100644 index 000000000..323137fed --- /dev/null +++ b/src/aap_eda/core/utils/rulebook_process_logs.py @@ -0,0 +1,73 @@ +# Copyright 2025 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +from datetime import datetime +from typing import Optional, Tuple + +from django.utils.timezone import make_aware + +TIMESTAMP_PATTERNS = re.compile( + r"^(\*{2} )?" + r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})" + r"[ ,.](\d{1,6})" + r"(?: -)?\s+" # for '-' or ' ' +) + +DATETIME_FORMATS = ( + "%Y-%m-%d %H:%M:%S,%f", + "%Y-%m-%d %H:%M:%S.%f", + "%Y-%m-%d %H:%M:%S %f", +) + + +def extract_datetime_and_message_from_log_entry( + log_entry: str, +) -> Tuple[Optional[datetime], str]: + """Extract timestamp and message from a log entry. + + Supported formats: + - "2023-01-01 12:34:56,789 message" + - "2023-01-01 12:34:56.789 message" + - "2023-01-01 12:34:56 789 message" + - "** 2023-01-01 12:34:56.789 - message" + + Args: + log_entry: Raw log entry string + + Returns: + Tuple containing parsed datetime (or None) and cleaned message + """ + match = TIMESTAMP_PATTERNS.match(log_entry) + if not match: + return None, log_entry.strip() + + prefix, base_time, microseconds = match.groups() + timestamp_str = ( + f"{base_time}{'.' if prefix else ','}{microseconds.ljust(6, '0')}" + ) + + for fmt in DATETIME_FORMATS: + try: + dt = datetime.strptime(timestamp_str, fmt) + aware_dt = make_aware(dt) + break + except ValueError: + continue + + message = log_entry[match.end() :].strip() + if message.startswith("- "): + message = message[2:] + + return aware_dt, message diff --git a/src/aap_eda/services/activation/db_log_handler.py b/src/aap_eda/services/activation/db_log_handler.py index 44dc9787c..beef7d7d0 100644 --- a/src/aap_eda/services/activation/db_log_handler.py +++ b/src/aap_eda/services/activation/db_log_handler.py @@ -20,6 +20,9 @@ from django.db import IntegrityError from aap_eda.core import models +from aap_eda.core.utils.rulebook_process_logs import ( + extract_datetime_and_message_from_log_entry, +) from aap_eda.services.activation.engine.common import LogHandler from aap_eda.services.activation.engine.exceptions import ( ContainerUpdateLogsError, @@ -59,15 +62,14 @@ def write( lines = [lines] for line in lines: - if timestamp: - dt = f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S,%f')[:-3]}" - line = f"{dt} {line}" + dt, message = extract_datetime_and_message_from_log_entry(line) self.activation_instance_log_buffer.append( models.RulebookProcessLog( - log=line, + log=message, activation_instance_id=self.activation_instance_id, log_timestamp=log_timestamp, + log_created_at=dt, ) ) self.line_count += 1 diff --git a/tests/integration/api/test_activation_instance.py b/tests/integration/api/test_activation_instance.py index 028ba5f3e..07562e09a 100644 --- a/tests/integration/api/test_activation_instance.py +++ b/tests/integration/api/test_activation_instance.py @@ -111,6 +111,7 @@ def test_list_logs_from_activation_instance( "id", "log", "log_timestamp", + "log_created_at", "activation_instance", ] diff --git a/tests/integration/core/migrations/test_0056_parse_log_entries.py b/tests/integration/core/migrations/test_0056_parse_log_entries.py new file mode 100644 index 000000000..989b5caa5 --- /dev/null +++ b/tests/integration/core/migrations/test_0056_parse_log_entries.py @@ -0,0 +1,194 @@ +# Copyright 2025 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from datetime import datetime +from importlib import import_module +from typing import Type +from unittest.mock import MagicMock, patch + +import pytest +from django.core.management import call_command +from django.db import connection +from django.db.migrations.executor import MigrationExecutor +from django.db.models import Model +from django.utils.timezone import make_aware + +from aap_eda.core import models + +TEST_DATA = [ + { + "msg": "2025-01-17 18:39:49,191 Starting Container", + "dt": make_aware(datetime(2025, 1, 17, 18, 39, 49, 191000)), + "log": "Starting Container", + }, + { + "msg": "** 2025-01-17 18:39:55.222773 [debug] ****", + "dt": make_aware(datetime(2025, 1, 17, 18, 39, 55, 222773)), + "log": "[debug] ****", + }, + { + "msg": "2025-01-17 18:43:38 638 [main] DEBUG org.drools.ansible", + "dt": make_aware(datetime(2025, 1, 17, 18, 43, 38, 638000)), + "log": "[main] DEBUG org.drools.ansible", + }, + { + "msg": "[main] DEBUG org.drools.ansible", + "dt": None, + "log": "[main] DEBUG org.drools.ansible", + }, + { + "msg": "2025-01-17T18:43:38.222773Z [main] DEBUG org.drools.ansible", + "dt": None, + "log": "2025-01-17T18:43:38.222773Z [main] DEBUG org.drools.ansible", + }, +] + + +def get_historical_model( + app_label: str, model_name: str, migration_name: str +) -> Type[Model]: + """ + Retrieves the historical model as it existed at the specified migration. + """ + executor = MigrationExecutor(connection) + + # Load the migration state at the specified migration + state = executor.loader.project_state((app_label, migration_name)) + + return state.apps.get_model(app_label, model_name) + + +@pytest.fixture +def historical_rulebook_process_logs( + default_activation_instance: models.RulebookProcess, +) -> list[models.RulebookProcessLog]: + # get historical model + historical_rulebook_process_log_model = get_historical_model( + "core", + "RulebookProcessLog", + "0055_activation_created_by_activation_modified_by_and_more", + ) + + """Return a list of rulebook process logs.""" + return historical_rulebook_process_log_model.objects.bulk_create( + historical_rulebook_process_log_model( + log=data["msg"], + activation_instance_id=default_activation_instance.id, + ) + for data in TEST_DATA + ) + + +@pytest.fixture +def reset_migrations(): + call_command("migrate", "core", "0055") + yield + call_command("migrate") + + +@pytest.mark.django_db(transaction=True) +def test_migration(historical_rulebook_process_logs, reset_migrations): + for log in historical_rulebook_process_logs: + assert not hasattr(log, "log_created_at") + + call_command("migrate", "core", "0056") + + for i, log in enumerate(models.RulebookProcessLog.objects.all()): + log.refresh_from_db() + assert log.log_created_at == TEST_DATA[i]["dt"] + assert log.log == TEST_DATA[i]["log"] + + +@pytest.mark.django_db +def test_up_log_entries_chunk_size(): + migration_module = import_module( + "aap_eda.core.migrations.0056_rulebookprocesslog_log_created_at_and_more" # noqa E501 + ) + + mock_apps = MagicMock() + mock_model = MagicMock() + mock_apps.get_model.return_value = mock_model + + chunk_size = migration_module.CHUNK_SIZE + mock_entries = [ + MagicMock(id=i, log=f"log {i}") for i in range(chunk_size * 2 + 1) + ] + mock_model.objects.iterator.return_value = mock_entries + + with patch.object( + migration_module, + "extract_datetime_and_message_from_log_entry", + side_effect=lambda log: ("2025-01-27 00:00:00", f"parsed {log}"), + ) as mock_extract: + with patch.object(migration_module, "logger") as mock_logger: + with patch.object( + mock_model.objects, "bulk_update" + ) as mock_bulk_update: + migration_module.up_log_entries(mock_apps, None) + + assert mock_bulk_update.call_count == 3 + + mock_logger.info.assert_any_call( + f"{chunk_size} entries are parsed." + ) + mock_logger.info.assert_any_call( + f"{chunk_size * 2} entries are parsed." + ) + mock_logger.info.assert_any_call( + f"Totally {len(mock_entries)} entries are parsed." + ) + + assert mock_extract.call_count == len(mock_entries) + + +@pytest.mark.django_db +def test_down_log_entries_chunk_size(): + migration_module = import_module( + "aap_eda.core.migrations.0056_rulebookprocesslog_log_created_at_and_more" # noqa E501 + ) + + mock_apps = MagicMock() + mock_model = MagicMock() + mock_apps.get_model.return_value = mock_model + + # Create mock entries exceeding CHUNK_SIZE + chunk_size = migration_module.CHUNK_SIZE + mock_entries = [ + MagicMock(id=i, log=f"log {i}", log_created_at=MagicMock()) + for i in range(chunk_size * 2 + 1) + ] + mock_model.objects.iterator.return_value = mock_entries + + with patch.object(migration_module, "logger") as mock_logger: + with patch.object( + mock_model.objects, "bulk_update" + ) as mock_bulk_update: + migration_module.down_log_entries(mock_apps, None) + + # Ensure bulk_update is called the correct number of times + assert mock_bulk_update.call_count == 3 + + # Verify the correct number of entries are passed each time + bulk_update_calls = mock_bulk_update.call_args_list + assert len(bulk_update_calls[0][0][0]) == chunk_size # 1st chunk + assert len(bulk_update_calls[1][0][0]) == chunk_size # 2nd chunk + assert len(bulk_update_calls[2][0][0]) == 1 # Remaining entry + + mock_logger.info.assert_any_call(f"{chunk_size} entries are reversed.") + mock_logger.info.assert_any_call( + f"{chunk_size * 2} entries are reversed." + ) + mock_logger.info.assert_any_call( + f"Totally {len(mock_entries)} entries are reversed." + ) diff --git a/tests/unit/migrations/test_update_credential_type.py b/tests/integration/core/migrations/test_update_credential_type.py similarity index 100% rename from tests/unit/migrations/test_update_credential_type.py rename to tests/integration/core/migrations/test_update_credential_type.py diff --git a/tests/integration/core/test_rulebook_process_logs.py b/tests/integration/core/test_rulebook_process_logs.py new file mode 100644 index 000000000..c05945125 --- /dev/null +++ b/tests/integration/core/test_rulebook_process_logs.py @@ -0,0 +1,73 @@ +# Copyright 2025 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from datetime import datetime + +import pytest +from django.utils.timezone import make_aware + +from aap_eda.core.utils.rulebook_process_logs import ( + extract_datetime_and_message_from_log_entry, +) + + +@pytest.mark.parametrize( + ("log_entry", "expected_dt", "expected_msg"), + [ + ( + "2025-01-17 18:39:49,191 Starting Container", + make_aware(datetime(2025, 1, 17, 18, 39, 49, 191000)), + "Starting Container", + ), + ( + "2025-01-17 18:39:55,215 - ansible_rulebook.action.run_job_template", # noqa: E501 + make_aware(datetime(2025, 1, 17, 18, 39, 55, 215000)), + "ansible_rulebook.action.run_job_template", + ), + ( + "** 2025-01-17 18:39:55.222773 [debug] ****", + make_aware(datetime(2025, 1, 17, 18, 39, 55, 222773)), + "[debug] ****", + ), + ( + "2025-01-17 18:43:38 638 [main] DEBUG org.drools.ansible", + make_aware(datetime(2025, 1, 17, 18, 43, 38, 638000)), + "[main] DEBUG org.drools.ansible", + ), + ( + "2025-01-17 18:39:55,215 - message with ending spaces ", + make_aware(datetime(2025, 1, 17, 18, 39, 55, 215000)), + "message with ending spaces", + ), + ( + "** 2025-01-17 18:39:55.222773 - [test] message", + make_aware(datetime(2025, 1, 17, 18, 39, 55, 222773)), + "[test] message", + ), + ( + "Ruleset: Long Running Range", + None, + "Ruleset: Long Running Range", + ), + ], +) +def test_extract_datetimes( + log_entry: str, + expected_dt: str, + expected_msg: str, +): + dt, message = extract_datetime_and_message_from_log_entry(log_entry) + + assert dt == expected_dt + assert message == expected_msg diff --git a/tests/integration/services/activation/engine/test_kubernetes.py b/tests/integration/services/activation/engine/test_kubernetes.py index 90f7bb1f5..2076d6dac 100644 --- a/tests/integration/services/activation/engine/test_kubernetes.py +++ b/tests/integration/services/activation/engine/test_kubernetes.py @@ -612,7 +612,7 @@ def test_update_logs(init_kubernetes_data, kubernetes_engine): ): pod_mock.status.container_statuses = get_pod_statuses("running") log_mock = mock.Mock() - message = "2023-10-30 19:18:48,375 INFO Result is kept for 500 seconds" + message = "INFO Result is kept for 500 seconds" with mock.patch.object(engine.client, "core_api") as core_api_mock: core_api_mock.read_namespaced_pod_log.return_value = log_mock log_mock.splitlines.return_value = [ diff --git a/tests/integration/services/activation/engine/test_podman.py b/tests/integration/services/activation/engine/test_podman.py index f7a8dcfa6..f8f85a6c8 100644 --- a/tests/integration/services/activation/engine/test_podman.py +++ b/tests/integration/services/activation/engine/test_podman.py @@ -645,8 +645,7 @@ def test_engine_update_logs(init_podman_data, podman_engine): log_handler = DBLogger(init_podman_data.activation_instance.id) init_log_read_at = init_podman_data.activation_instance.log_read_at message = ( - "2023-10-31 15:28:01,318 - ansible_rulebook.engine - INFO - " - "Calling main in ansible.eda.range" + "ansible_rulebook.engine - INFO - Calling main in ansible.eda.range" ) container_mock = mock.Mock()