diff --git a/CHANGELOG.md b/CHANGELOG.md index 21d998c61d..fd7332e3e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,9 @@ # Change Log -## v1.0.8 (2022-07-21) +## v1.0.9 (2022-07-21) - Frontend bug fixes & improvements - Support regex_replace() in templates +- Bring back alert group caching and list view ## v1.0.7 (2022-07-18) - Backend & frontend bug fixes diff --git a/engine/apps/alerts/incident_appearance/renderers/base_renderer.py b/engine/apps/alerts/incident_appearance/renderers/base_renderer.py index f18fd6a351..234c80383a 100644 --- a/engine/apps/alerts/incident_appearance/renderers/base_renderer.py +++ b/engine/apps/alerts/incident_appearance/renderers/base_renderer.py @@ -18,12 +18,9 @@ def templater_class(self): class AlertGroupBaseRenderer(ABC): - def __init__(self, alert_group, alert=None): - if alert is None: - alert = alert_group.alerts.first() - + def __init__(self, alert_group): self.alert_group = alert_group - self.alert_renderer = self.alert_renderer_class(alert) + self.alert_renderer = self.alert_renderer_class(self.alert_group.alerts.first()) @property @abstractmethod diff --git a/engine/apps/alerts/incident_appearance/renderers/classic_markdown_renderer.py b/engine/apps/alerts/incident_appearance/renderers/classic_markdown_renderer.py index aa7a059ef4..9759e86ba4 100644 --- a/engine/apps/alerts/incident_appearance/renderers/classic_markdown_renderer.py +++ b/engine/apps/alerts/incident_appearance/renderers/classic_markdown_renderer.py @@ -20,11 +20,11 @@ def render(self): class AlertGroupClassicMarkdownRenderer(AlertGroupBaseRenderer): - def __init__(self, alert_group, alert=None): - if alert is None: - alert = alert_group.alerts.last() + def __init__(self, alert_group): + super().__init__(alert_group) - super().__init__(alert_group, alert) + # use the last alert to render content + self.alert_renderer = self.alert_renderer_class(self.alert_group.alerts.last()) @property def alert_renderer_class(self): diff --git a/engine/apps/alerts/incident_appearance/renderers/web_renderer.py b/engine/apps/alerts/incident_appearance/renderers/web_renderer.py index 681f94f551..e68d453cd0 100644 --- a/engine/apps/alerts/incident_appearance/renderers/web_renderer.py +++ b/engine/apps/alerts/incident_appearance/renderers/web_renderer.py @@ -20,11 +20,11 @@ def render(self): class AlertGroupWebRenderer(AlertGroupBaseRenderer): - def __init__(self, alert_group, alert=None): - if alert is None: - alert = alert_group.alerts.last() + def __init__(self, alert_group): + super().__init__(alert_group) - super().__init__(alert_group, alert) + # use the last alert to render content + self.alert_renderer = self.alert_renderer_class(self.alert_group.alerts.last()) @property def alert_renderer_class(self): diff --git a/engine/apps/alerts/models/alert.py b/engine/apps/alerts/models/alert.py index e5bd504d47..3e08e7b911 100644 --- a/engine/apps/alerts/models/alert.py +++ b/engine/apps/alerts/models/alert.py @@ -5,7 +5,7 @@ from django.apps import apps from django.conf import settings from django.core.validators import MinLengthValidator -from django.db import models +from django.db import models, transaction from django.db.models import JSONField from django.db.models.signals import post_save @@ -261,6 +261,9 @@ def listen_for_alert_model_save(sender, instance, created, *args, **kwargs): else: distribute_alert.apply_async((instance.pk,), countdown=TASK_DELAY_SECONDS) + logger.info(f"Recalculate AG cache. Reason: save alert model {instance.pk}") + transaction.on_commit(instance.group.schedule_cache_for_web) + # Connect signal to base Alert class post_save.connect(listen_for_alert_model_save, Alert) diff --git a/engine/apps/alerts/models/alert_group.py b/engine/apps/alerts/models/alert_group.py index 7e3e313717..16b2d19bcf 100644 --- a/engine/apps/alerts/models/alert_group.py +++ b/engine/apps/alerts/models/alert_group.py @@ -8,9 +8,12 @@ from celery import uuid as celery_uuid from django.apps import apps from django.conf import settings +from django.core.cache import cache from django.core.validators import MinLengthValidator -from django.db import IntegrityError, models +from django.db import IntegrityError, models, transaction from django.db.models import JSONField, Q, QuerySet +from django.db.models.signals import post_save +from django.dispatch import receiver from django.utils import timezone from django.utils.functional import cached_property @@ -19,9 +22,16 @@ from apps.alerts.incident_appearance.renderers.slack_renderer import AlertGroupSlackRenderer from apps.alerts.incident_log_builder import IncidentLogBuilder from apps.alerts.signals import alert_group_action_triggered_signal -from apps.alerts.tasks import acknowledge_reminder_task, call_ack_url, send_alert_group_signal, unsilence_task +from apps.alerts.tasks import ( + acknowledge_reminder_task, + call_ack_url, + schedule_cache_for_alert_group, + send_alert_group_signal, + unsilence_task, +) from apps.slack.slack_formatter import SlackFormatter from apps.user_management.models import User +from common.mixins.use_random_readonly_db_manager_mixin import UseRandomReadonlyDbManagerMixin from common.public_primary_keys import generate_public_primary_key, increase_public_primary_key_length from common.utils import clean_markup, str_or_backup @@ -98,6 +108,10 @@ def filter(self, *args, **kwargs): return super().filter(*args, **kwargs, is_archived=False) +class AlertGroupManager(UseRandomReadonlyDbManagerMixin, models.Manager): + pass + + class AlertGroupSlackRenderingMixin: """ Ideally this mixin should not exist. Instead of this instance of AlertGroupSlackRenderer should be created and used @@ -120,8 +134,8 @@ def slack_templated_first_alert(self): class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.Model): - all_objects = AlertGroupQuerySet.as_manager() - unarchived_objects = UnarchivedAlertGroupQuerySet.as_manager() + all_objects = AlertGroupManager.from_queryset(AlertGroupQuerySet)() + unarchived_objects = AlertGroupManager.from_queryset(UnarchivedAlertGroupQuerySet)() ( NEW, @@ -228,6 +242,8 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models. active_escalation_id = models.CharField(max_length=100, null=True, default=None) # ID generated by celery active_resolve_calculation_id = models.CharField(max_length=100, null=True, default=None) # ID generated by celery + # ID generated by celery + active_cache_for_web_calculation_id = models.CharField(max_length=100, null=True, default=None) SILENCE_DELAY_OPTIONS = ( (1800, "30 minutes"), @@ -299,9 +315,7 @@ def status(self): related_name="dependent_alert_groups", ) - # cached_render_for_web and active_cache_for_web_calculation_id are deprecated - cached_render_for_web = models.JSONField(default=dict) - active_cache_for_web_calculation_id = models.CharField(max_length=100, null=True, default=None) + cached_render_for_web = JSONField(default=dict) last_unique_unacknowledge_process_id = models.CharField(max_length=100, null=True, default=None) is_archived = models.BooleanField(default=False) @@ -390,6 +404,18 @@ def skip_escalation_in_slack(self): def is_alert_a_resolve_signal(self, alert): raise NotImplementedError + def cache_for_web(self, organization): + from apps.api.serializers.alert_group import AlertGroupSerializer + + # Re-take object to switch connection from readonly db to master. + _self = AlertGroup.all_objects.get(pk=self.pk) + _self.cached_render_for_web = AlertGroupSerializer(self, context={"organization": organization}).data + self.cached_render_for_web = _self.cached_render_for_web + _self.save(update_fields=["cached_render_for_web"]) + + def schedule_cache_for_web(self): + schedule_cache_for_alert_group.apply_async((self.pk,)) + @property def permalink(self): if self.slack_message is not None: @@ -399,6 +425,10 @@ def permalink(self): def web_link(self): return urljoin(self.channel.organization.web_link, f"?page=incident&id={self.public_primary_key}") + @property + def alerts_count(self): + return self.alerts.count() + @property def happened_while_maintenance(self): return self.root_alert_group is not None and self.root_alert_group.maintenance_uuid is not None @@ -419,6 +449,10 @@ def acknowledge_by_user(self, user: User, action_source: Optional[str] = None) - self.unresolve() self.log_records.create(type=AlertGroupLogRecord.TYPE_UN_RESOLVED, author=user, reason="Acknowledge button") + # clear resolve report cache + cache_key = "render_after_resolve_report_json_{}".format(self.pk) + cache.delete(cache_key) + self.acknowledge(acknowledged_by_user=user, acknowledged_by=AlertGroup.USER) self.stop_escalation() if self.is_root_alert_group: @@ -639,6 +673,9 @@ def un_resolve_by_user(self, user: User, action_source: Optional[str] = None) -> self.unresolve() log_record = self.log_records.create(type=AlertGroupLogRecord.TYPE_UN_RESOLVED, author=user) + # clear resolve report cache + self.drop_cached_after_resolve_report_json() + if self.is_root_alert_group: self.start_escalation_if_needed() @@ -811,6 +848,10 @@ def silence_by_user(self, user: User, silence_delay: Optional[int], action_sourc self.unresolve() self.log_records.create(type=AlertGroupLogRecord.TYPE_UN_RESOLVED, author=user, reason="Silence button") + # clear resolve report cache + cache_key = "render_after_resolve_report_json_{}".format(self.pk) + cache.delete(cache_key) + if self.acknowledged: self.unacknowledge() self.log_records.create(type=AlertGroupLogRecord.TYPE_UN_ACK, author=user, reason="Silence button") @@ -1019,6 +1060,8 @@ def bulk_acknowledge(user: User, alert_groups: "QuerySet[AlertGroup]") -> None: author=user, reason="Bulk action acknowledge", ) + # clear resolve report cache + alert_group.drop_cached_after_resolve_report_json() for alert_group in alert_groups_to_unsilence_before_acknowledge_list: alert_group.log_records.create( @@ -1151,6 +1194,8 @@ def bulk_restart(user: User, alert_groups: "QuerySet[AlertGroup]") -> None: reason="Bulk action restart", ) + alert_group.drop_cached_after_resolve_report_json() + if alert_group.is_root_alert_group: alert_group.start_escalation_if_needed() @@ -1248,6 +1293,7 @@ def bulk_silence(user: User, alert_groups: "QuerySet[AlertGroup]", silence_delay author=user, reason="Bulk action silence", ) + alert_group.drop_cached_after_resolve_report_json() for alert_group in alert_groups_to_unsilence_before_silence_list: alert_group.log_records.create( @@ -1437,7 +1483,7 @@ def get_acknowledge_text(self, mention_user=False): else: return "Acknowledged" - def render_after_resolve_report_json(self): + def non_cached_after_resolve_report_json(self): AlertGroupLogRecord = apps.get_model("alerts", "AlertGroupLogRecord") UserNotificationPolicyLogRecord = apps.get_model("base", "UserNotificationPolicyLogRecord") ResolutionNote = apps.get_model("alerts", "ResolutionNote") @@ -1455,6 +1501,21 @@ def render_after_resolve_report_json(self): result_log_report.append(log_record.render_log_line_json()) return result_log_report + def render_after_resolve_report_json(self): + cache_key = "render_after_resolve_report_json_{}".format(self.pk) + + # cache.get_or_set in some cases returns None, so use get and set cache methods separately + log_report = cache.get(cache_key) + if log_report is None: + log_report = self.non_cached_after_resolve_report_json() + cache.set(cache_key, log_report) + return log_report + + def drop_cached_after_resolve_report_json(self): + cache_key = "render_after_resolve_report_json_{}".format(self.pk) + if cache_key in cache: + cache.delete(cache_key) + @property def has_resolution_notes(self): return self.resolution_notes.exists() @@ -1534,3 +1595,14 @@ def last_stop_escalation_log(self): ) return stop_escalation_log + + +@receiver(post_save, sender=AlertGroup) +def listen_for_alert_group_model_save(sender, instance, created, *args, **kwargs): + if ( + kwargs is not None + and "update_fields" in kwargs + and kwargs["update_fields"] is dict + and "cached_render_for_web" not in kwargs["update_fields"] + ): + transaction.on_commit(instance.schedule_cache_for_alert_group) diff --git a/engine/apps/alerts/models/alert_group_log_record.py b/engine/apps/alerts/models/alert_group_log_record.py index c2bacc7d0b..7e5e30c938 100644 --- a/engine/apps/alerts/models/alert_group_log_record.py +++ b/engine/apps/alerts/models/alert_group_log_record.py @@ -3,7 +3,7 @@ import humanize from django.apps import apps -from django.db import models +from django.db import models, transaction from django.db.models import JSONField from django.db.models.signals import post_save from django.dispatch import receiver @@ -546,6 +546,7 @@ def get_step_specific_info(self): @receiver(post_save, sender=AlertGroupLogRecord) def listen_for_alertgrouplogrecord(sender, instance, created, *args, **kwargs): + instance.alert_group.drop_cached_after_resolve_report_json() if instance.type != AlertGroupLogRecord.TYPE_DELETED: if not instance.alert_group.is_maintenance_incident: alert_group_pk = instance.alert_group.pk @@ -554,3 +555,6 @@ def listen_for_alertgrouplogrecord(sender, instance, created, *args, **kwargs): f"alert group event: {instance.get_type_display()}" ) send_update_log_report_signal.apply_async(kwargs={"alert_group_pk": alert_group_pk}, countdown=8) + + logger.info(f"Recalculate AG cache. Reason: save alert_group_log_record model {instance.pk}") + transaction.on_commit(instance.alert_group.schedule_cache_for_web) diff --git a/engine/apps/alerts/models/alert_receive_channel.py b/engine/apps/alerts/models/alert_receive_channel.py index 643f737e9f..308686c020 100644 --- a/engine/apps/alerts/models/alert_receive_channel.py +++ b/engine/apps/alerts/models/alert_receive_channel.py @@ -19,7 +19,11 @@ from apps.alerts.grafana_alerting_sync_manager.grafana_alerting_sync import GrafanaAlertingSyncManager from apps.alerts.integration_options_mixin import IntegrationOptionsMixin from apps.alerts.models.maintainable_object import MaintainableObject -from apps.alerts.tasks import disable_maintenance, sync_grafana_alerting_contact_points +from apps.alerts.tasks import ( + disable_maintenance, + invalidate_web_cache_for_alert_group, + sync_grafana_alerting_contact_points, +) from apps.base.messaging import get_messaging_backend_from_id from apps.base.utils import live_settings from apps.integrations.metadata import heartbeat @@ -689,6 +693,16 @@ def listen_for_alertreceivechannel_model_save(sender, instance, created, *args, create_organization_log( instance.organization, None, OrganizationLogType.TYPE_HEARTBEAT_CREATED, description ) + else: + logger.info(f"Drop AG cache. Reason: save alert_receive_channel {instance.pk}") + if kwargs is not None: + if "update_fields" in kwargs: + if kwargs["update_fields"] is not None: + # Hack to not to invalidate web cache on AlertReceiveChannel.start_send_rate_limit_message_task + if "rate_limit_message_task_id" in kwargs["update_fields"]: + return + + invalidate_web_cache_for_alert_group.apply_async(kwargs={"channel_pk": instance.pk}) if instance.integration == AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING: if created: diff --git a/engine/apps/alerts/tasks/__init__.py b/engine/apps/alerts/tasks/__init__.py index 79b8b0ed72..3ff8501ef5 100644 --- a/engine/apps/alerts/tasks/__init__.py +++ b/engine/apps/alerts/tasks/__init__.py @@ -9,7 +9,7 @@ from .delete_alert_group import delete_alert_group # noqa: F401 from .distribute_alert import distribute_alert # noqa: F401 from .escalate_alert_group import escalate_alert_group # noqa: F401 -from .invalidate_web_cache_for_alert_group import invalidate_web_cache_for_alert_group # noqa: F401, todo: remove +from .invalidate_web_cache_for_alert_group import invalidate_web_cache_for_alert_group # noqa: F401 from .invite_user_to_join_incident import invite_user_to_join_incident # noqa: F401 from .maintenance import disable_maintenance # noqa: F401 from .notify_all import notify_all_task # noqa: F401 diff --git a/engine/apps/alerts/tasks/cache_alert_group_for_web.py b/engine/apps/alerts/tasks/cache_alert_group_for_web.py index 5f0c52d50c..677e0a1973 100644 --- a/engine/apps/alerts/tasks/cache_alert_group_for_web.py +++ b/engine/apps/alerts/tasks/cache_alert_group_for_web.py @@ -1,19 +1,54 @@ +from celery.utils.log import get_task_logger +from django.apps import apps from django.conf import settings +from django.core.cache import cache from common.custom_celery_tasks import shared_dedicated_queue_retry_task +logger = get_task_logger(__name__) + + +def get_cache_key_caching_alert_group_for_web(alert_group_pk): + CACHE_KEY_PREFIX = "cache_alert_group_for_web" + return f"{CACHE_KEY_PREFIX}_{alert_group_pk}" + @shared_dedicated_queue_retry_task( autoretry_for=(Exception,), retry_backoff=True, max_retries=0 if settings.DEBUG else None ) def schedule_cache_for_alert_group(alert_group_pk): - # todo: remove - pass + CACHE_FOR_ALERT_GROUP_LIFETIME = 60 + START_CACHE_DELAY = 5 # we introduce delay to avoid recaching after each alert. + + task = cache_alert_group_for_web.apply_async(args=[alert_group_pk], countdown=START_CACHE_DELAY) + cache_key = get_cache_key_caching_alert_group_for_web(alert_group_pk) + cache.set(cache_key, task.id, timeout=CACHE_FOR_ALERT_GROUP_LIFETIME) @shared_dedicated_queue_retry_task( autoretry_for=(Exception,), retry_backoff=True, max_retries=0 if settings.DEBUG else None ) def cache_alert_group_for_web(alert_group_pk): - # todo: remove - pass + """ + Async task to re-cache alert_group for web. + """ + cache_key = get_cache_key_caching_alert_group_for_web(alert_group_pk) + cached_task_id = cache.get(cache_key) + current_task_id = cache_alert_group_for_web.request.id + + if cached_task_id is None: + return ( + f"cache_alert_group_for_web skipped, because of current task_id ({current_task_id})" + f" for alert_group {alert_group_pk} doesn't exist in cache, which means this task is not" + f" relevant: cache was dropped by engine restart ot CACHE_FOR_ALERT_GROUP_LIFETIME" + ) + if not current_task_id == cached_task_id or cached_task_id is None: + return ( + f"cache_alert_group_for_web skipped, because of current task_id ({current_task_id})" + f" doesn't equal to cached task_id ({cached_task_id}) for alert_group {alert_group_pk}," + ) + else: + AlertGroup = apps.get_model("alerts", "AlertGroup") + alert_group = AlertGroup.all_objects.using_readonly_db.get(pk=alert_group_pk) + alert_group.cache_for_web(alert_group.channel.organization) + logger.info(f"cache_alert_group_for_web: cache refreshed for alert_group {alert_group_pk}") diff --git a/engine/apps/alerts/tasks/invalidate_web_cache_for_alert_group.py b/engine/apps/alerts/tasks/invalidate_web_cache_for_alert_group.py index 9c8786d9af..d9c7c4f988 100644 --- a/engine/apps/alerts/tasks/invalidate_web_cache_for_alert_group.py +++ b/engine/apps/alerts/tasks/invalidate_web_cache_for_alert_group.py @@ -1,11 +1,32 @@ +from django.apps import apps from django.conf import settings from common.custom_celery_tasks import shared_dedicated_queue_retry_task +from .task_logger import task_logger + @shared_dedicated_queue_retry_task( autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else None ) def invalidate_web_cache_for_alert_group(org_pk=None, channel_pk=None, alert_group_pk=None, alert_group_pks=None): - # todo: remove - pass + AlertGroup = apps.get_model("alerts", "AlertGroup") + DynamicSetting = apps.get_model("base", "DynamicSetting") + + if channel_pk: + task_logger.debug(f"invalidate_web_cache_for_alert_group: Reason - alert_receive_channel {channel_pk}") + q = AlertGroup.all_objects.filter(channel__pk=channel_pk) + elif org_pk: + task_logger.debug(f"invalidate_web_cache_for_alert_group: Reason - organization {org_pk}") + q = AlertGroup.all_objects.filter(channel__organization__pk=org_pk) + elif alert_group_pk: + task_logger.debug(f"invalidate_web_cache_for_alert_group: Reason - alert_group {alert_group_pk}") + q = AlertGroup.all_objects.filter(pk=alert_group_pk) + elif alert_group_pks: + task_logger.debug(f"invalidate_web_cache_for_alert_group: Reason - alert_groups {alert_group_pks}") + q = AlertGroup.all_objects.filter(pk__in=alert_group_pks) + + skip_task = DynamicSetting.objects.get_or_create(name="skip_invalidate_web_cache_for_alert_group")[0] + if skip_task.boolean_value: + return "Task has been skipped because of skip_invalidate_web_cache_for_alert_group DynamicSetting" + q.update(cached_render_for_web={}) diff --git a/engine/apps/api/serializers/alert_group.py b/engine/apps/api/serializers/alert_group.py index 340646e761..a9e5f9f273 100644 --- a/engine/apps/api/serializers/alert_group.py +++ b/engine/apps/api/serializers/alert_group.py @@ -1,5 +1,7 @@ import logging +from datetime import datetime +import humanize from rest_framework import serializers from apps.alerts.incident_appearance.renderers.classic_markdown_renderer import AlertGroupClassicMarkdownRenderer @@ -27,31 +29,51 @@ def get_render_for_web(self, obj): return AlertGroupWebRenderer(obj).render() -class AlertGroupListSerializer(EagerLoadingMixin, serializers.ModelSerializer): +class AlertGroupSerializer(EagerLoadingMixin, serializers.ModelSerializer): + """ + Attention: It's heavily cached. Make sure to invalidate alertgroup's web cache if you update the format! + """ + pk = serializers.CharField(read_only=True, source="public_primary_key") alert_receive_channel = FastAlertReceiveChannelSerializer(source="channel") - status = serializers.ReadOnlyField() + alerts = serializers.SerializerMethodField("get_limited_alerts") + resolved_by_verbose = serializers.CharField(source="get_resolved_by_display") resolved_by_user = FastUserSerializer(required=False) acknowledged_by_user = FastUserSerializer(required=False) silenced_by_user = FastUserSerializer(required=False) related_users = serializers.SerializerMethodField() + + last_alert_at = serializers.SerializerMethodField() + + started_at_verbose = serializers.SerializerMethodField() + acknowledged_at_verbose = serializers.SerializerMethodField() + resolved_at_verbose = serializers.SerializerMethodField() + silenced_at_verbose = serializers.SerializerMethodField() + dependent_alert_groups = ShortAlertGroupSerializer(many=True) root_alert_group = ShortAlertGroupSerializer() - alerts_count = serializers.IntegerField(read_only=True) + alerts_count = serializers.ReadOnlyField() + + status = serializers.ReadOnlyField() render_for_web = serializers.SerializerMethodField() render_for_classic_markdown = serializers.SerializerMethodField() PREFETCH_RELATED = [ + "alerts", "dependent_alert_groups", + "log_records", "log_records__author", + "log_records__escalation_policy", + "log_records__invitation__invitee", ] SELECT_RELATED = [ + "slack_message", "channel__organization", - "root_alert_group", - "resolved_by_user", + "slack_message___slack_team_identity", "acknowledged_by_user", + "resolved_by_user", "silenced_by_user", ] @@ -65,6 +87,7 @@ class Meta: "alert_receive_channel", "resolved", "resolved_by", + "resolved_by_verbose", "resolved_by_user", "resolved_at", "acknowledged_at", @@ -75,19 +98,45 @@ class Meta: "silenced", "silenced_by_user", "silenced_at", + "silenced_at_verbose", "silenced_until", "started_at", + "last_alert_at", "silenced_until", + "permalink", + "alerts", "related_users", + "started_at_verbose", + "acknowledged_at_verbose", + "resolved_at_verbose", "render_for_web", + "render_after_resolve_report_json", "render_for_classic_markdown", "dependent_alert_groups", "root_alert_group", "status", ] - def get_render_for_web(self, obj): - return AlertGroupWebRenderer(obj, obj.last_alert).render() + def get_last_alert_at(self, obj): + last_alert = obj.alerts.last() + # TODO: This is a Hotfix for 0.0.27 + if last_alert is None: + logger.warning(f"obj {obj} doesn't have last_alert!") + return "" + return str(last_alert.created_at) + + def get_limited_alerts(self, obj): + """ + Overriding default alerts because there are alert_groups with thousands of them. + It's just too slow, we need to cut here. + """ + alerts = obj.alerts.all()[:100] + + if len(alerts) > 90: + for alert in alerts: + alert.title = str(alert.title) + " Only last 100 alerts are shown. Use Amixr API to fetch all of them." + + return AlertSerializer(alerts, many=True).data def get_render_for_classic_markdown(self, obj): return AlertGroupClassicMarkdownRenderer(obj).render() @@ -116,39 +165,37 @@ def get_related_users(self, obj): users_ids.add(log_record.author.public_primary_key) return users - -class AlertGroupSerializer(AlertGroupListSerializer): - alerts = serializers.SerializerMethodField("get_limited_alerts") - last_alert_at = serializers.SerializerMethodField() - - class Meta(AlertGroupListSerializer.Meta): - fields = AlertGroupListSerializer.Meta.fields + [ - "alerts", - "render_after_resolve_report_json", - "permalink", - "last_alert_at", - ] + def get_started_at_verbose(self, obj): + started_at_verbose = None + if obj.started_at is not None: + started_at_verbose = humanize.naturaltime( + datetime.now().replace(tzinfo=None) - obj.started_at.replace(tzinfo=None) + ) + return started_at_verbose + + def get_acknowledged_at_verbose(self, obj): + acknowledged_at_verbose = None + if obj.acknowledged_at is not None: + acknowledged_at_verbose = humanize.naturaltime( + datetime.now().replace(tzinfo=None) - obj.acknowledged_at.replace(tzinfo=None) + ) # TODO: Deal with timezones + return acknowledged_at_verbose + + def get_resolved_at_verbose(self, obj): + resolved_at_verbose = None + if obj.resolved_at is not None: + resolved_at_verbose = humanize.naturaltime( + datetime.now().replace(tzinfo=None) - obj.resolved_at.replace(tzinfo=None) + ) # TODO: Deal with timezones + return resolved_at_verbose + + def get_silenced_at_verbose(self, obj): + silenced_at_verbose = None + if obj.silenced_at is not None: + silenced_at_verbose = humanize.naturaltime( + datetime.now().replace(tzinfo=None) - obj.silenced_at.replace(tzinfo=None) + ) # TODO: Deal with timezones + return silenced_at_verbose def get_render_for_web(self, obj): return AlertGroupWebRenderer(obj).render() - - def get_last_alert_at(self, obj): - last_alert = obj.alerts.last() - - if not last_alert: - return obj.started_at - - return last_alert.created_at - - def get_limited_alerts(self, obj): - """ - Overriding default alerts because there are alert_groups with thousands of them. - It's just too slow, we need to cut here. - """ - alerts = obj.alerts.all()[:100] - - if len(alerts) > 90: - for alert in alerts: - alert.title = str(alert.title) + " Only last 100 alerts are shown. Use OnCall API to fetch all of them." - - return AlertSerializer(alerts, many=True).data diff --git a/engine/apps/api/serializers/resolution_note.py b/engine/apps/api/serializers/resolution_note.py index 0017868557..330259e3f1 100644 --- a/engine/apps/api/serializers/resolution_note.py +++ b/engine/apps/api/serializers/resolution_note.py @@ -1,6 +1,7 @@ from rest_framework import serializers from apps.alerts.models import AlertGroup, ResolutionNote +from apps.alerts.tasks import invalidate_web_cache_for_alert_group from apps.api.serializers.user import FastUserSerializer from common.api_helpers.custom_fields import OrganizationFilteredPrimaryKeyRelatedField from common.api_helpers.exceptions import BadRequest @@ -38,6 +39,9 @@ def create(self, validated_data): validated_data["author"] = self.context["request"].user validated_data["source"] = ResolutionNote.Source.WEB created_instance = super().create(validated_data) + # Invalidate alert group cache because resolution notes shown in alert group's timeline + created_instance.alert_group.drop_cached_after_resolve_report_json() + invalidate_web_cache_for_alert_group(alert_group_pk=created_instance.alert_group.pk) return created_instance def to_representation(self, instance): @@ -53,5 +57,8 @@ class ResolutionNoteUpdateSerializer(ResolutionNoteSerializer): def update(self, instance, validated_data): if instance.source != ResolutionNote.Source.WEB: raise BadRequest(detail="Cannot update message with this source type") - - return super().update(instance, validated_data) + updated_instance = super().update(instance, validated_data) + # Invalidate alert group cache because resolution notes shown in alert group's timeline + updated_instance.alert_group.drop_cached_after_resolve_report_json() + invalidate_web_cache_for_alert_group(alert_group_pk=updated_instance.alert_group.pk) + return updated_instance diff --git a/engine/apps/api/tasks.py b/engine/apps/api/tasks.py new file mode 100644 index 0000000000..4240178adb --- /dev/null +++ b/engine/apps/api/tasks.py @@ -0,0 +1,55 @@ +from celery.utils.log import get_task_logger +from django.apps import apps +from django.conf import settings +from django.core.cache import cache + +from common.custom_celery_tasks import shared_dedicated_queue_retry_task + +logger = get_task_logger(__name__) + + +def get_cache_key_caching_alert_group_for_web(alert_group_pk): + CACHE_KEY_PREFIX = "cache_alert_group_for_web" + return f"{CACHE_KEY_PREFIX}_{alert_group_pk}" + + +# TODO: remove this tasks after all of them will be processed in prod +@shared_dedicated_queue_retry_task( + autoretry_for=(Exception,), retry_backoff=True, max_retries=0 if settings.DEBUG else None +) +def schedule_cache_for_alert_group(alert_group_pk): + CACHE_FOR_ALERT_GROUP_LIFETIME = 60 + START_CACHE_DELAY = 5 # we introduce delay to avoid recaching after each alert. + + task = cache_alert_group_for_web.apply_async(args=[alert_group_pk], countdown=START_CACHE_DELAY) + cache_key = get_cache_key_caching_alert_group_for_web(alert_group_pk) + cache.set(cache_key, task.id, timeout=CACHE_FOR_ALERT_GROUP_LIFETIME) + + +@shared_dedicated_queue_retry_task( + autoretry_for=(Exception,), retry_backoff=True, max_retries=0 if settings.DEBUG else None +) +def cache_alert_group_for_web(alert_group_pk): + """ + Async task to re-cache alert_group for web. + """ + cache_key = get_cache_key_caching_alert_group_for_web(alert_group_pk) + cached_task_id = cache.get(cache_key) + current_task_id = cache_alert_group_for_web.request.id + + if cached_task_id is None: + return ( + f"cache_alert_group_for_web skipped, because of current task_id ({current_task_id})" + f" for alert_group {alert_group_pk} doesn't exist in cache, which means this task is not" + f" relevant: cache was dropped by engine restart ot CACHE_FOR_ALERT_GROUP_LIFETIME" + ) + if not current_task_id == cached_task_id or cached_task_id is None: + return ( + f"cache_alert_group_for_web skipped, because of current task_id ({current_task_id})" + f" doesn't equal to cached task_id ({cached_task_id}) for alert_group {alert_group_pk}," + ) + else: + AlertGroup = apps.get_model("alerts", "AlertGroup") + alert_group = AlertGroup.all_objects.using_readonly_db.get(pk=alert_group_pk) + alert_group.cache_for_web(alert_group.channel.organization) + logger.info(f"cache_alert_group_for_web: cache refreshed for alert_group {alert_group_pk}") diff --git a/engine/apps/api/tests/test_alert_group.py b/engine/apps/api/tests/test_alert_group.py index 6d4a0b9e1d..983a22bf73 100644 --- a/engine/apps/api/tests/test_alert_group.py +++ b/engine/apps/api/tests/test_alert_group.py @@ -63,7 +63,7 @@ def test_get_filter_started_at(alert_group_internal_api_setup, make_user_auth_he ) assert response.status_code == status.HTTP_200_OK - assert len(response.data["results"]) == 4 + assert response.data["count"] == 4 @pytest.mark.django_db @@ -78,7 +78,7 @@ def test_get_filter_resolved_at_alertgroup_empty_result(alert_group_internal_api **make_user_auth_headers(user, token), ) assert response.status_code == status.HTTP_200_OK - assert len(response.data["results"]) == 0 + assert response.data["count"] == 0 @pytest.mark.django_db @@ -105,7 +105,7 @@ def test_get_filter_resolved_at(alert_group_internal_api_setup, make_user_auth_h **make_user_auth_headers(user, token), ) assert response.status_code == status.HTTP_200_OK - assert len(response.data["results"]) == 1 + assert response.data["count"] == 1 @pytest.mark.django_db @@ -117,7 +117,7 @@ def test_status_new(alert_group_internal_api_setup, make_user_auth_headers): url = reverse("api-internal:alertgroup-list") response = client.get(url + "?status=0", format="json", **make_user_auth_headers(user, token)) assert response.status_code == status.HTTP_200_OK - assert len(response.data["results"]) == 1 + assert response.data["count"] == 1 assert response.data["results"][0]["pk"] == new_alert_group.public_primary_key @@ -130,7 +130,7 @@ def test_status_ack(alert_group_internal_api_setup, make_user_auth_headers): url = reverse("api-internal:alertgroup-list") response = client.get(url + "?status=1", format="json", **make_user_auth_headers(user, token)) assert response.status_code == status.HTTP_200_OK - assert len(response.data["results"]) == 1 + assert response.data["count"] == 1 assert response.data["results"][0]["pk"] == ack_alert_group.public_primary_key @@ -143,7 +143,7 @@ def test_status_resolved(alert_group_internal_api_setup, make_user_auth_headers) url = reverse("api-internal:alertgroup-list") response = client.get(url + "?status=2", format="json", **make_user_auth_headers(user, token)) assert response.status_code == status.HTTP_200_OK - assert len(response.data["results"]) == 1 + assert response.data["count"] == 1 assert response.data["results"][0]["pk"] == resolved_alert_group.public_primary_key @@ -156,7 +156,7 @@ def test_status_silenced(alert_group_internal_api_setup, make_user_auth_headers) url = reverse("api-internal:alertgroup-list") response = client.get(url + "?status=3", format="json", **make_user_auth_headers(user, token)) assert response.status_code == status.HTTP_200_OK - assert len(response.data["results"]) == 1 + assert response.data["count"] == 1 assert response.data["results"][0]["pk"] == silenced_alert_group.public_primary_key @@ -171,7 +171,7 @@ def test_all_statuses(alert_group_internal_api_setup, make_user_auth_headers): url + "?status=0&status=1&&status=2&status=3", format="json", **make_user_auth_headers(user, token) ) assert response.status_code == status.HTTP_200_OK - assert len(response.data["results"]) == 4 + assert response.data["count"] == 4 @pytest.mark.django_db @@ -213,7 +213,7 @@ def test_get_filter_resolved_by( **make_user_auth_headers(first_user, token), ) assert first_response.status_code == status.HTTP_200_OK - assert len(first_response.data["results"]) == 1 + assert first_response.data["count"] == 1 second_response = client.get( url + f"?resolved_by={second_user.public_primary_key}", @@ -221,7 +221,7 @@ def test_get_filter_resolved_by( **make_user_auth_headers(first_user, token), ) assert second_response.status_code == status.HTTP_200_OK - assert len(second_response.data["results"]) == 0 + assert second_response.data["count"] == 0 @pytest.mark.django_db @@ -269,7 +269,7 @@ def make_resolved_by_user_alert_group(user): **make_user_auth_headers(first_user, token), ) assert first_response.status_code == status.HTTP_200_OK - assert len(first_response.data["results"]) == 2 + assert first_response.data["count"] == 2 @pytest.mark.django_db @@ -309,7 +309,7 @@ def test_get_filter_acknowledged_by( **make_user_auth_headers(first_user, token), ) assert first_response.status_code == status.HTTP_200_OK - assert len(first_response.data["results"]) == 1 + assert first_response.data["count"] == 1 second_response = client.get( url + f"?acknowledged_by={second_user.public_primary_key}", @@ -317,7 +317,7 @@ def test_get_filter_acknowledged_by( **make_user_auth_headers(first_user, token), ) assert second_response.status_code == status.HTTP_200_OK - assert len(second_response.data["results"]) == 0 + assert second_response.data["count"] == 0 @pytest.mark.django_db @@ -363,7 +363,7 @@ def make_acknowledged_by_user_alert_group(user): **make_user_auth_headers(first_user, token), ) assert first_response.status_code == status.HTTP_200_OK - assert len(first_response.data["results"]) == 2 + assert first_response.data["count"] == 2 @pytest.mark.django_db @@ -402,7 +402,7 @@ def test_get_filter_silenced_by( **make_user_auth_headers(first_user, token), ) assert first_response.status_code == status.HTTP_200_OK - assert len(first_response.data["results"]) == 1 + assert first_response.data["count"] == 1 second_response = client.get( url + f"?silenced_by={second_user.public_primary_key}", @@ -410,7 +410,7 @@ def test_get_filter_silenced_by( **make_user_auth_headers(first_user, token), ) assert second_response.status_code == status.HTTP_200_OK - assert len(second_response.data["results"]) == 0 + assert second_response.data["count"] == 0 @pytest.mark.django_db @@ -455,7 +455,7 @@ def make_silenced_by_user_alert_group(user): **make_user_auth_headers(first_user, token), ) assert first_response.status_code == status.HTTP_200_OK - assert len(first_response.data["results"]) == 2 + assert first_response.data["count"] == 2 @pytest.mark.django_db @@ -494,7 +494,7 @@ def test_get_filter_invitees_are( **make_user_auth_headers(first_user, token), ) assert first_response.status_code == status.HTTP_200_OK - assert len(first_response.data["results"]) == 1 + assert first_response.data["count"] == 1 second_response = client.get( url + f"?invitees_are={second_user.public_primary_key}", @@ -502,7 +502,7 @@ def test_get_filter_invitees_are( **make_user_auth_headers(first_user, token), ) assert second_response.status_code == status.HTTP_200_OK - assert len(second_response.data["results"]) == 0 + assert second_response.data["count"] == 0 @pytest.mark.django_db @@ -548,7 +548,7 @@ def make_alert_group_with_invitee(user): **make_user_auth_headers(first_user, token), ) assert first_response.status_code == status.HTTP_200_OK - assert len(first_response.data["results"]) == 2 + assert first_response.data["count"] == 2 @pytest.mark.django_db @@ -593,7 +593,7 @@ def test_get_filter_invitees_are_ag_with_multiple_logs( **make_user_auth_headers(first_user, token), ) assert first_response.status_code == status.HTTP_200_OK - assert len(first_response.data["results"]) == 1 + assert first_response.data["count"] == 1 @pytest.mark.django_db @@ -611,11 +611,11 @@ def test_get_filter_with_resolution_note( # there are no alert groups with resolution_notes response = client.get(url + "?with_resolution_note=true", format="json", **make_user_auth_headers(user, token)) assert response.status_code == status.HTTP_200_OK - assert len(response.data["results"]) == 0 + assert response.data["count"] == 0 response = client.get(url + "?with_resolution_note=false", format="json", **make_user_auth_headers(user, token)) assert response.status_code == status.HTTP_200_OK - assert len(response.data["results"]) == 4 + assert response.data["count"] == 4 # add resolution_notes to two of four alert groups make_resolution_note(res_alert_group) @@ -623,11 +623,11 @@ def test_get_filter_with_resolution_note( response = client.get(url + "?with_resolution_note=true", format="json", **make_user_auth_headers(user, token)) assert response.status_code == status.HTTP_200_OK - assert len(response.data["results"]) == 2 + assert response.data["count"] == 2 response = client.get(url + "?with_resolution_note=false", format="json", **make_user_auth_headers(user, token)) assert response.status_code == status.HTTP_200_OK - assert len(response.data["results"]) == 2 + assert response.data["count"] == 2 @pytest.mark.django_db @@ -653,7 +653,7 @@ def test_get_filter_with_resolution_note_after_delete_resolution_note( response = client.get(url + "?with_resolution_note=true", format="json", **make_user_auth_headers(user, token)) assert response.status_code == status.HTTP_200_OK - assert len(response.data["results"]) == 1 + assert response.data["count"] == 1 @pytest.mark.django_db diff --git a/engine/apps/api/views/alert_group.py b/engine/apps/api/views/alert_group.py index 822fe9f5f0..5ea7e93b78 100644 --- a/engine/apps/api/views/alert_group.py +++ b/engine/apps/api/views/alert_group.py @@ -1,6 +1,10 @@ -from datetime import timedelta +from datetime import datetime, timedelta -from django.db.models import Count, Max, Q +from django import forms +from django.db import models +from django.db.models import CharField, Q +from django.db.models.constants import LOOKUP_SEP +from django.db.models.functions import Cast from django.utils import timezone from django_filters import rest_framework as filters from django_filters.widgets import RangeWidget @@ -11,15 +15,16 @@ from rest_framework.response import Response from apps.alerts.constants import ActionSource -from apps.alerts.models import Alert, AlertGroup, AlertReceiveChannel +from apps.alerts.models import AlertGroup, AlertReceiveChannel +from apps.alerts.tasks import invalidate_web_cache_for_alert_group from apps.api.permissions import MODIFY_ACTIONS, READ_ACTIONS, ActionPermission, AnyRole, IsAdminOrEditor -from apps.api.serializers.alert_group import AlertGroupListSerializer, AlertGroupSerializer +from apps.api.serializers.alert_group import AlertGroupSerializer from apps.auth_token.auth import MobileAppAuthTokenAuthentication, PluginAuthentication from apps.user_management.models import User from common.api_helpers.exceptions import BadRequest from common.api_helpers.filters import DateRangeFilterMixin, ModelFieldFilterMixin from common.api_helpers.mixins import PreviewTemplateMixin, PublicPrimaryKeyMixin -from common.api_helpers.paginators import TwentyFiveCursorPaginator +from common.api_helpers.paginators import FiftyPageSizePaginator def get_integration_queryset(request): @@ -143,6 +148,34 @@ def filter_with_resolution_note(self, queryset, name, value): return queryset +class CustomSearchFilter(SearchFilter): + def must_call_distinct(self, queryset, search_fields): + """ + Return True if 'distinct()' should be used to query the given lookups. + """ + for search_field in search_fields: + opts = queryset.model._meta + if search_field[0] in self.lookup_prefixes: + search_field = search_field[1:] + + # From https://github.com/encode/django-rest-framework/pull/6240/files#diff-01f357e474dd8fd702e4951b9227bffcR88 + # Annotated fields do not need to be distinct + if isinstance(queryset, models.QuerySet) and search_field in queryset.query.annotations: + continue + + parts = search_field.split(LOOKUP_SEP) + for part in parts: + field = opts.get_field(part) + if hasattr(field, "get_path_info"): + # This field is a relation, update opts to follow the relation + path_info = field.get_path_info() + opts = path_info[-1].to_opts + if any(path.m2m for path in path_info): + # This field is a m2m relation so we know we need to call distinct + return True + return False + + class AlertGroupView( PreviewTemplateMixin, PublicPrimaryKeyMixin, @@ -183,85 +216,90 @@ class AlertGroupView( serializer_class = AlertGroupSerializer - pagination_class = TwentyFiveCursorPaginator + pagination_class = FiftyPageSizePaginator - filter_backends = [SearchFilter, filters.DjangoFilterBackend] - # todo: add ability to search by templated title - search_fields = ["public_primary_key", "inside_organization_number"] + filter_backends = [CustomSearchFilter, filters.DjangoFilterBackend] + search_fields = ["cached_render_for_web_str"] filterset_class = AlertGroupFilter - def get_serializer_class(self): - if self.action == "list": - return AlertGroupListSerializer - - return super().get_serializer_class() - - def get_queryset(self): - # make a separate query to fetch all the integrations for current organization and team (it's faster) - alert_receive_channel_pks = AlertReceiveChannel.objects_with_deleted.filter( - organization=self.request.auth.organization, team=self.request.user.current_team - ).values_list("pk", flat=True) - alert_receive_channel_pks = list(alert_receive_channel_pks) - - # no select_related or prefetch_related is used at this point, it will be done on paginate_queryset. - queryset = AlertGroup.unarchived_objects.filter(channel_id__in=alert_receive_channel_pks) - - return queryset - - def paginate_queryset(self, queryset): + def list(self, request, *args, **kwargs): """ - All SQL joins (select_related and prefetch_related) will be performed AFTER pagination, so it only joins tables - for 25 alert groups, not the whole table. - """ - alert_groups = super().paginate_queryset(queryset) - alert_groups = self.enrich(alert_groups) - return alert_groups - - def get_object(self): - obj = super().get_object() - obj = self.enrich([obj])[0] - return obj - - def enrich(self, alert_groups): - """ - This method performs select_related and prefetch_related (using setup_eager_loading) as well as in-memory joins - to add additional info like alert_count and last_alert for every alert group efficiently. - We need the last_alert because it's used by AlertGroupWebRenderer. + It's compute-heavy so we rely on cache here. + Attention: Make sure to invalidate cache if you update the format! """ + queryset = self.filter_queryset(self.get_queryset(eager=False, readonly=True)) + + page = self.paginate_queryset(queryset) + skip_slow_rendering = request.query_params.get("skip_slow_rendering") == "true" + data = [] + + for alert_group in page: + if alert_group.cached_render_for_web == {}: + # We cannot give empty data to web. So caching synchronously here. + if skip_slow_rendering: + # We just return dummy data. + # Cache is not launched because after skip_slow_rendering request should come usual one + # which will start caching + data.append({"pk": alert_group.pk, "short": True}) + else: + # Synchronously cache and return. It could be slow. + alert_group.cache_for_web(alert_group.channel.organization) + data.append(alert_group.cached_render_for_web) + else: + data.append(alert_group.cached_render_for_web) + if not skip_slow_rendering: + # Cache is not launched because after skip_slow_rendering request should come usual one + # which will start caching + alert_group.schedule_cache_for_web() + + return self.get_paginated_response(data) + + def get_queryset(self, eager=True, readonly=False, order=True): + if readonly: + queryset = AlertGroup.unarchived_objects.using_readonly_db + else: + queryset = AlertGroup.unarchived_objects - # enrich alert groups with select_related and prefetch_related - alert_group_pks = [alert_group.pk for alert_group in alert_groups] - queryset = AlertGroup.all_objects.filter(pk__in=alert_group_pks).order_by("-pk") - queryset = self.get_serializer_class().setup_eager_loading(queryset) - alert_groups = list(queryset) - - # get info on alerts count and last alert ID for every alert group - alerts_info = ( - Alert.objects.values("group_id") - .filter(group_id__in=alert_group_pks) - .annotate(alerts_count=Count("group_id"), last_alert_id=Max("id")) + queryset = queryset.filter( + channel__organization=self.request.auth.organization, + channel__team=self.request.user.current_team, ) - alerts_info_map = {info["group_id"]: info for info in alerts_info} - # fetch last alerts for every alert group - last_alert_ids = [info["last_alert_id"] for info in alerts_info_map.values()] - last_alerts = Alert.objects.filter(pk__in=last_alert_ids) - for alert in last_alerts: - # link group back to alert - alert.group = [alert_group for alert_group in alert_groups if alert_group.pk == alert.group_id][0] - alerts_info_map[alert.group_id].update({"last_alert": alert}) + if order: + queryset = queryset.order_by("-started_at") - # add additional "alerts_count" and "last_alert" fields to every alert group - for alert_group in alert_groups: - alert_group.last_alert = alerts_info_map[alert_group.pk]["last_alert"] - alert_group.alerts_count = alerts_info_map[alert_group.pk]["alerts_count"] + queryset = queryset.annotate(cached_render_for_web_str=Cast("cached_render_for_web", output_field=CharField())) - return alert_groups + if eager: + queryset = self.serializer_class.setup_eager_loading(queryset) + return queryset + + def get_alert_groups_and_days_for_previous_same_period(self): + prev_alert_groups = AlertGroup.unarchived_objects.none() + delta_days = None + + started_at = self.request.query_params.get("started_at", None) + if started_at is not None: + started_at_gte, started_at_lte = AlertGroupFilter.parse_custom_datetime_range(started_at) + delta_days = None + if started_at_lte is not None: + started_at_lte = forms.DateTimeField().to_python(started_at_lte) + else: + started_at_lte = datetime.now() + + if started_at_gte is not None: + started_at_gte = forms.DateTimeField().to_python(value=started_at_gte) + delta = started_at_lte.replace(tzinfo=None) - started_at_gte.replace(tzinfo=None) + prev_alert_groups = self.get_queryset().filter( + started_at__range=[started_at_gte - delta, started_at_gte] + ) + delta_days = delta.days + return prev_alert_groups, delta_days @action(detail=False) def stats(self, *args, **kwargs): - alert_groups = self.filter_queryset(self.get_queryset()) + alert_groups = self.filter_queryset(self.get_queryset(eager=False)) # Only count field is used, other fields left just in case for the backward compatibility return Response( { @@ -286,6 +324,7 @@ def acknowledge(self, request, pk): if alert_group.root_alert_group is not None: raise BadRequest(detail="Can't acknowledge an attached alert group") alert_group.acknowledge_by_user(self.request.user, action_source=ActionSource.WEB) + invalidate_web_cache_for_alert_group(alert_group_pk=alert_group.pk) return Response(AlertGroupSerializer(alert_group, context={"request": self.request}).data) @@ -305,6 +344,7 @@ def unacknowledge(self, request, pk): raise BadRequest(detail="Can't unacknowledge a resolved alert group") alert_group.un_acknowledge_by_user(self.request.user, action_source=ActionSource.WEB) + invalidate_web_cache_for_alert_group(alert_group_pk=alert_group.pk) return Response(AlertGroupSerializer(alert_group, context={"request": self.request}).data) @@ -325,6 +365,7 @@ def resolve(self, request, pk): status=status.HTTP_400_BAD_REQUEST, ) alert_group.resolve_by_user(self.request.user, action_source=ActionSource.WEB) + invalidate_web_cache_for_alert_group(alert_group_pk=alert_group.pk) return Response(AlertGroupSerializer(alert_group, context={"request": self.request}).data) @action(methods=["post"], detail=True) @@ -340,6 +381,7 @@ def unresolve(self, request, pk): raise BadRequest(detail="The alert group is not resolved") alert_group.un_resolve_by_user(self.request.user, action_source=ActionSource.WEB) + invalidate_web_cache_for_alert_group(alert_group_pk=alert_group.pk) return Response(AlertGroupSerializer(alert_group, context={"request": self.request}).data) @action(methods=["post"], detail=True) @@ -362,6 +404,8 @@ def attach(self, request, pk=None): return Response(status=status.HTTP_400_BAD_REQUEST) alert_group.attach_by_user(self.request.user, root_alert_group, action_source=ActionSource.WEB) + invalidate_web_cache_for_alert_group(alert_group_pk=alert_group.pk) + invalidate_web_cache_for_alert_group(alert_group_pk=root_alert_group.pk) return Response(AlertGroupSerializer(alert_group, context={"request": self.request}).data) @action(methods=["post"], detail=True) @@ -371,8 +415,10 @@ def unattach(self, request, pk=None): raise BadRequest(detail="Can't unattach maintenance alert group") if alert_group.is_root_alert_group: raise BadRequest(detail="Can't unattach an alert group because it is not attached") - + root_alert_group_pk = alert_group.root_alert_group_id alert_group.un_attach_by_user(self.request.user, action_source=ActionSource.WEB) + invalidate_web_cache_for_alert_group(alert_group_pk=alert_group.pk) + invalidate_web_cache_for_alert_group(alert_group_pk=root_alert_group_pk) return Response(AlertGroupSerializer(alert_group, context={"request": self.request}).data) @action(methods=["post"], detail=True) @@ -387,6 +433,7 @@ def silence(self, request, pk=None): raise BadRequest(detail="Can't silence an attached alert group") alert_group.silence_by_user(request.user, silence_delay=delay, action_source=ActionSource.WEB) + invalidate_web_cache_for_alert_group(alert_group_pk=alert_group.pk) return Response(AlertGroupSerializer(alert_group, context={"request": request}).data) @action(methods=["get"], detail=False) @@ -501,9 +548,9 @@ def bulk_action(self, request): raise BadRequest(detail="Please specify a delay for silence") kwargs["silence_delay"] = delay - alert_groups = AlertGroup.unarchived_objects.filter( - channel__organization=self.request.auth.organization, public_primary_key__in=alert_group_public_pks - ) + alert_groups = self.get_queryset(eager=False).filter(public_primary_key__in=alert_group_public_pks) + alert_group_pks = list(alert_groups.values_list("id", flat=True)) + invalidate_web_cache_for_alert_group(alert_group_pks=alert_group_pks) kwargs["user"] = self.request.user kwargs["alert_groups"] = alert_groups diff --git a/engine/apps/api/views/route_regex_debugger.py b/engine/apps/api/views/route_regex_debugger.py index ffa9cc717c..527684ac43 100644 --- a/engine/apps/api/views/route_regex_debugger.py +++ b/engine/apps/api/views/route_regex_debugger.py @@ -43,7 +43,10 @@ def get(self, request): if len(incidents_matching_regex) < MAX_INCIDENTS_TO_SHOW: first_alert = ag.alerts.all()[0] if re.search(regex, json.dumps(first_alert.raw_request_data)): - title = AlertWebRenderer(first_alert).render()["title"] + if ag.cached_render_for_web: + title = ag.cached_render_for_web["render_for_web"]["title"] + else: + title = AlertWebRenderer(first_alert).render()["title"] incidents_matching_regex.append( { "title": title, diff --git a/engine/apps/api/views/schedule.py b/engine/apps/api/views/schedule.py index 1de11b85aa..1b587210d5 100644 --- a/engine/apps/api/views/schedule.py +++ b/engine/apps/api/views/schedule.py @@ -190,9 +190,10 @@ def get_request_timezone(self): return user_tz, date - def _filter_events(self, schedule, timezone, starting_date, days, with_empty, with_gap): + def _filter_events(self, schedule, user_timezone, starting_date, days, with_empty, with_gap): shifts = ( - list_of_oncall_shifts_from_ical(schedule, starting_date, timezone, with_empty, with_gap, days=days) or [] + list_of_oncall_shifts_from_ical(schedule, starting_date, user_timezone, with_empty, with_gap, days=days) + or [] ) events = [] # for start, end, users, priority_level, source in shifts: diff --git a/engine/apps/base/models/user_notification_policy_log_record.py b/engine/apps/base/models/user_notification_policy_log_record.py index ed261b2b29..d8afed2db9 100644 --- a/engine/apps/base/models/user_notification_policy_log_record.py +++ b/engine/apps/base/models/user_notification_policy_log_record.py @@ -315,6 +315,7 @@ def render_log_line_action(self, for_slack=False, substitute_author_with_tag=Fal @receiver(post_save, sender=UserNotificationPolicyLogRecord) def listen_for_usernotificationpolicylogrecord_model_save(sender, instance, created, *args, **kwargs): + instance.alert_group.drop_cached_after_resolve_report_json() alert_group_pk = instance.alert_group.pk if instance.type != UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FINISHED: logger.debug( diff --git a/engine/apps/public_api/tests/test_incidents.py b/engine/apps/public_api/tests/test_incidents.py index ea1198a048..d43a1fb849 100644 --- a/engine/apps/public_api/tests/test_incidents.py +++ b/engine/apps/public_api/tests/test_incidents.py @@ -32,7 +32,7 @@ def construct_expected_response_from_incidents(incidents): "id": incident.public_primary_key, "integration_id": incident.channel.public_primary_key, "route_id": incident.channel_filter.public_primary_key, - "alerts_count": incident.alerts.count(), + "alerts_count": incident.alerts_count, "state": incident.state, "created_at": created_at, "resolved_at": resolved_at, diff --git a/engine/apps/slack/scenarios/alertgroup_appearance.py b/engine/apps/slack/scenarios/alertgroup_appearance.py index 588b70d0e0..1ccba05f65 100644 --- a/engine/apps/slack/scenarios/alertgroup_appearance.py +++ b/engine/apps/slack/scenarios/alertgroup_appearance.py @@ -247,6 +247,10 @@ def process_scenario(self, slack_user_identity, slack_team_identity, payload, ac if new_value is None and old_value is not None: setattr(alert_receive_channel, attr_name, None) alert_receive_channel.save() + # Drop caches for current alert group + if notification_channel == "web": + setattr(alert_group, f"cached_render_for_web_{templatizable_attr}", None) + alert_group.save() elif new_value is not None: default_values = getattr( AlertReceiveChannel, @@ -261,10 +265,18 @@ def process_scenario(self, slack_user_identity, slack_team_identity, payload, ac jinja_template_env.from_string(new_value) setattr(alert_receive_channel, attr_name, new_value) alert_receive_channel.save() + # Drop caches for current alert group + if notification_channel == "web": + setattr(alert_group, f"cached_render_for_web_{templatizable_attr}", None) + alert_group.save() elif default_value is not None and new_value.strip() == default_value.strip(): new_value = None setattr(alert_receive_channel, attr_name, new_value) alert_receive_channel.save() + # Drop caches for current alert group + if notification_channel == "web": + setattr(alert_group, f"cached_render_for_web_{templatizable_attr}", None) + alert_group.save() except TemplateSyntaxError: return Response( {"response_action": "errors", "errors": {attr_name: "Template has incorrect format"}}, diff --git a/engine/apps/slack/scenarios/resolution_note.py b/engine/apps/slack/scenarios/resolution_note.py index f6c78305c6..364704b7cd 100644 --- a/engine/apps/slack/scenarios/resolution_note.py +++ b/engine/apps/slack/scenarios/resolution_note.py @@ -674,6 +674,7 @@ def process_scenario(self, slack_user_identity, slack_team_identity, payload, ac add_to_resolution_note = True if value["msg_value"].startswith("add") else False slack_thread_message = None resolution_note = None + drop_ag_cache = False alert_group = AlertGroup.all_objects.get(pk=alert_group_pk) @@ -694,6 +695,7 @@ def process_scenario(self, slack_user_identity, slack_team_identity, payload, ac else: resolution_note.recreate() self.add_resolution_note_reaction(slack_thread_message) + drop_ag_cache = True elif not add_to_resolution_note: # Check if resolution_note can be removed if ( @@ -718,9 +720,13 @@ def process_scenario(self, slack_user_identity, slack_team_identity, payload, ac slack_thread_message.added_to_resolution_note = False slack_thread_message.save(update_fields=["added_to_resolution_note"]) self.remove_resolution_note_reaction(slack_thread_message) + drop_ag_cache = True self.update_alert_group_resolution_note_button( alert_group, ) + if drop_ag_cache: + alert_group.drop_cached_after_resolve_report_json() + alert_group.schedule_cache_for_web() resolution_note_data = json.loads(payload["actions"][0]["value"]) resolution_note_data["resolution_note_window_action"] = "edit_update" ResolutionNoteModalStep(slack_team_identity, self.organization, self.user).process_scenario( diff --git a/engine/apps/user_management/models/user.py b/engine/apps/user_management/models/user.py index 84794e2e32..ca769243ac 100644 --- a/engine/apps/user_management/models/user.py +++ b/engine/apps/user_management/models/user.py @@ -8,6 +8,7 @@ from django.dispatch import receiver from emoji import demojize +from apps.alerts.tasks import invalidate_web_cache_for_alert_group from apps.schedules.tasks import drop_cached_ical_for_custom_events_for_organization from common.constants.role import Role from common.public_primary_keys import generate_public_primary_key, increase_public_primary_key_length @@ -263,3 +264,5 @@ def listen_for_user_model_save(sender, instance, created, *args, **kwargs): drop_cached_ical_for_custom_events_for_organization.apply_async( (instance.organization_id,), ) + logger.info(f"Drop AG cache. Reason: save user {instance.pk}") + invalidate_web_cache_for_alert_group.apply_async(kwargs={"org_pk": instance.organization_id}) diff --git a/engine/common/api_helpers/paginators.py b/engine/common/api_helpers/paginators.py index 01ce2cc6c6..023f2294c3 100644 --- a/engine/common/api_helpers/paginators.py +++ b/engine/common/api_helpers/paginators.py @@ -1,4 +1,4 @@ -from rest_framework.pagination import CursorPagination, PageNumberPagination +from rest_framework.pagination import PageNumberPagination class HundredPageSizePaginator(PageNumberPagination): @@ -11,10 +11,3 @@ class FiftyPageSizePaginator(PageNumberPagination): class TwentyFivePageSizePaginator(PageNumberPagination): page_size = 25 - - -class TwentyFiveCursorPaginator(CursorPagination): - page_size = 25 - max_page_size = 100 - page_size_query_param = "perpage" - ordering = "-pk" diff --git a/engine/common/mixins/use_random_readonly_db_manager_mixin.py b/engine/common/mixins/use_random_readonly_db_manager_mixin.py new file mode 100644 index 0000000000..46559aa414 --- /dev/null +++ b/engine/common/mixins/use_random_readonly_db_manager_mixin.py @@ -0,0 +1,21 @@ +import random + +from django.conf import settings + + +class UseRandomReadonlyDbManagerMixin: + """ + Use this Mixin in ModelManagers, when you want to use the random readonly replica + """ + + @property + def using_readonly_db(self): + """Select one of the readonly databases this QuerySet should execute against.""" + if hasattr(settings, "READONLY_DATABASES") and len(settings.READONLY_DATABASES) > 0: + using_db = random.choice(list(settings.READONLY_DATABASES.keys())) + return self.using(using_db) + else: + # Use "default" database + # Django uses the database with the alias of default when no other database has been selected. + # https://docs.djangoproject.com/en/3.2/topics/db/multi-db/#defining-your-databases + return self.using("default") diff --git a/engine/settings/dev.py b/engine/settings/dev.py index b5e0e2f5ba..d63b6f7473 100644 --- a/engine/settings/dev.py +++ b/engine/settings/dev.py @@ -32,6 +32,10 @@ TESTING = "pytest" in sys.modules or "unittest" in sys.modules +READONLY_DATABASES = {} + +# Dictionaries concatenation, introduced in python3.9 +DATABASES = DATABASES | READONLY_DATABASES CACHES = { "default": { diff --git a/engine/settings/prod_without_db.py b/engine/settings/prod_without_db.py index 5b8a83b4f6..60b4cc288e 100644 --- a/engine/settings/prod_without_db.py +++ b/engine/settings/prod_without_db.py @@ -84,11 +84,12 @@ def on_uwsgi_worker_exit(): "apps.alerts.tasks.create_contact_points_for_datasource.create_contact_points_for_datasource": {"queue": "default"}, "apps.alerts.tasks.sync_grafana_alerting_contact_points.sync_grafana_alerting_contact_points": {"queue": "default"}, "apps.alerts.tasks.delete_alert_group.delete_alert_group": {"queue": "default"}, - "apps.alerts.tasks.invalidate_web_cache_for_alert_group.invalidate_web_cache_for_alert_group": { - "queue": "default" - }, # todo: remove + "apps.alerts.tasks.invalidate_web_cache_for_alert_group.invalidate_web_cache_for_alert_group": {"queue": "default"}, "apps.alerts.tasks.send_alert_group_signal.send_alert_group_signal": {"queue": "default"}, "apps.alerts.tasks.wipe.wipe": {"queue": "default"}, + # TODO: remove cache_alert_group_for_web and schedule_cache_for_alert_group once existing task will be processed + "apps.api.tasks.cache_alert_group_for_web": {"queue": "default"}, + "apps.api.tasks.schedule_cache_for_alert_group": {"queue": "default"}, "apps.heartbeat.tasks.heartbeat_checkup": {"queue": "default"}, "apps.heartbeat.tasks.integration_heartbeat_checkup": {"queue": "default"}, "apps.heartbeat.tasks.process_heartbeat_task": {"queue": "default"}, diff --git a/grafana-plugin/src/components/CursorPagination/CursorPagination.module.css b/grafana-plugin/src/components/CursorPagination/CursorPagination.module.css deleted file mode 100644 index 63d08ecceb..0000000000 --- a/grafana-plugin/src/components/CursorPagination/CursorPagination.module.css +++ /dev/null @@ -1,3 +0,0 @@ -.root { - display: block; -} diff --git a/grafana-plugin/src/components/CursorPagination/CursorPagination.tsx b/grafana-plugin/src/components/CursorPagination/CursorPagination.tsx deleted file mode 100644 index 33b228e126..0000000000 --- a/grafana-plugin/src/components/CursorPagination/CursorPagination.tsx +++ /dev/null @@ -1,79 +0,0 @@ -import React, { FC, useCallback, useEffect, useState } from 'react'; - -import { SelectableValue } from '@grafana/data'; -import { Button, HorizontalGroup, Icon, Select } from '@grafana/ui'; -import cn from 'classnames/bind'; - -import Text from 'components/Text/Text'; - -import styles from './CursorPagination.module.css'; - -interface CursorPaginationProps { - current: string; - onChange: (cursor: string, direction: 'prev' | 'next') => void; - itemsPerPageOptions: Array>; - itemsPerPage: number; - onChangeItemsPerPage: (value: number) => void; - prev: string; - next: string; -} - -const cx = cn.bind(styles); - -const CursorPagination: FC = (props) => { - const { current, onChange, prev, next, itemsPerPage, itemsPerPageOptions, onChangeItemsPerPage } = props; - - const [disabled, setDisabled] = useState(false); - - useEffect(() => { - setDisabled(false); - }, [prev, next]); - - const onChangeItemsPerPageCallback = useCallback((option) => { - setDisabled(true); - onChangeItemsPerPage(option.value); - }, []); - - return ( - - - Items per list -