Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
joeyorlando committed Nov 21, 2024
1 parent ee013d8 commit 6229ff3
Show file tree
Hide file tree
Showing 9 changed files with 178 additions and 196 deletions.
16 changes: 0 additions & 16 deletions engine/apps/slack/alert_group_slack_service.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import logging
import typing

from django.core.cache import cache

from apps.slack.client import SlackClient
from apps.slack.errors import (
SlackAPICantUpdateMessageError,
Expand All @@ -25,11 +23,6 @@
class AlertGroupSlackService:
_slack_client: SlackClient

UPDATE_ALERT_GROUP_DEBOUNCE_INTERVAL_SECONDS = 30
"""
Time in seconds to wait before allowing the next update to the Alert Group slack message
"""

def __init__(
self,
slack_team_identity: "SlackTeamIdentity",
Expand All @@ -43,15 +36,9 @@ def __init__(

def update_alert_group_slack_message(self, alert_group: "AlertGroup") -> None:
alert_group_pk = alert_group.pk
debounce_alert_group_update_cache_key = f"debounce_update_alert_group_slack_message_{alert_group_pk}"

logger.info(f"Update message for alert_group {alert_group_pk}")

# Check if the method has been called recently for this alert_group, if so skip to avoid approaching rate limits
if cache.get(debounce_alert_group_update_cache_key):
logger.info(f"Skipping update for alert_group {alert_group_pk} due to debounce interval")
return

try:
self._slack_client.chat_update(
channel=alert_group.slack_message.channel_id,
Expand All @@ -78,9 +65,6 @@ def update_alert_group_slack_message(self, alert_group: "AlertGroup") -> None:
SlackAPIChannelNotFoundError,
):
pass
finally:
# Set the cache key to enforce debounce interval
cache.set(debounce_alert_group_update_cache_key, True, self.UPDATE_ALERT_GROUP_DEBOUNCE_INTERVAL_SECONDS)

def publish_message_to_alert_group_thread(
self, alert_group: "AlertGroup", attachments=None, mrkdwn=True, unfurl_links=True, text=None
Expand Down
1 change: 0 additions & 1 deletion engine/apps/slack/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

SLACK_RATE_LIMIT_TIMEOUT = datetime.timedelta(minutes=5)
SLACK_RATE_LIMIT_DELAY = 10
CACHE_UPDATE_INCIDENT_SLACK_MESSAGE_LIFETIME = 60 * 10

BLOCK_SECTION_TEXT_MAX_SIZE = 2800
PRIVATE_METADATA_MAX_LENGTH = 3000
Expand Down
68 changes: 63 additions & 5 deletions engine/apps/slack/models/slack_message.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import uuid

from django.db import models
from django.utils import timezone

from apps.slack.client import SlackClient
from apps.slack.constants import BLOCK_SECTION_TEXT_MAX_SIZE
Expand All @@ -15,9 +16,12 @@
SlackAPIRatelimitError,
SlackAPITokenError,
)
from apps.slack.tasks import update_alert_group_slack_message

if typing.TYPE_CHECKING:
from apps.alerts.models import AlertGroup
from apps.base.models import UserNotificationPolicy
from apps.user_management.models import User

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
Expand All @@ -26,6 +30,8 @@
class SlackMessage(models.Model):
alert_group: typing.Optional["AlertGroup"]

ALERT_GROUP_UPDATE_DEBOUNCE_INTERVAL_SECONDS = 45

id = models.CharField(primary_key=True, default=uuid.uuid4, editable=False, max_length=36)

slack_id = models.CharField(max_length=100)
Expand All @@ -46,11 +52,9 @@ class SlackMessage(models.Model):
)

ack_reminder_message_ts = models.CharField(max_length=100, null=True, default=None)

created_at = models.DateTimeField(auto_now_add=True)

cached_permalink = models.URLField(max_length=250, null=True, default=None)

created_at = models.DateTimeField(auto_now_add=True)
last_updated = models.DateTimeField(null=True, default=None)

alert_group = models.ForeignKey(
Expand All @@ -61,8 +65,10 @@ class SlackMessage(models.Model):
related_name="slack_messages",
)

# ID of a latest celery task to update the message
active_update_task_id = models.CharField(max_length=100, null=True, default=None)
"""
ID of the latest celery task to update the message
"""

class Meta:
# slack_id is unique within the context of a channel or conversation
Expand Down Expand Up @@ -105,7 +111,12 @@ def permalink(self) -> typing.Optional[str]:
def deep_link(self) -> str:
return f"https://slack.com/app_redirect?channel={self.channel_id}&team={self.slack_team_identity.slack_id}&message={self.slack_id}"

def send_slack_notification(self, user, alert_group, notification_policy):
def send_slack_notification(
self,
user: "User",
alert_group: "AlertGroup",
notification_policy: "UserNotificationPolicy",
) -> None:
from apps.base.models import UserNotificationPolicyLogRecord

slack_message = alert_group.slack_message
Expand Down Expand Up @@ -220,3 +231,50 @@ def send_slack_notification(self, user, alert_group, notification_policy):
slack_user_identity.send_link_to_slack_message(slack_message)
except (SlackAPITokenError, SlackAPIMethodNotSupportedForChannelTypeError):
pass

def update_alert_groups_message(self) -> None:
"""
Schedule an update task for the associated alert group's Slack message, respecting the debounce interval.
This method ensures that updates to the Slack message related to an alert group are not performed
too frequently, adhering to the `ALERT_GROUP_UPDATE_DEBOUNCE_INTERVAL_SECONDS` debounce interval.
It schedules a background task to update the message after the appropriate countdown.
The method performs the following steps:
- Checks if there's already an active update task (`active_update_task_id` is set). If so, exits to prevent
duplicate scheduling.
- Calculates the time since the last update (`last_updated` field) and determines the remaining time needed
to respect the debounce interval.
- Schedules the `update_alert_group_slack_message` task with the calculated countdown.
- Stores the task ID in `active_update_task_id` to prevent multiple tasks from being scheduled.
"""

if not self.alert_group:
logger.warning(
f"skipping update_alert_groups_message as SlackMessage {self.pk} has no alert_group associated with it"
)
return
elif self.active_update_task_id:
logger.info(
f"skipping update_alert_groups_message as SlackMessage {self.pk} has an active update task {self.active_update_task_id}"
)
return

now = timezone.now()

# we previously weren't updating the last_updated field for messages, so there will be cases
# where the last_updated field is None
last_updated = self.last_updated or now

time_since_last_update = (now - last_updated).total_seconds()
remaining_time = self.ALERT_GROUP_UPDATE_DEBOUNCE_INTERVAL_SECONDS - time_since_last_update
countdown = max(remaining_time, 10)

logger.info(
f"updating message for alert_group {self.alert_group.pk} in {countdown} seconds "
f"(debounce interval: {self.ALERT_GROUP_UPDATE_DEBOUNCE_INTERVAL_SECONDS})"
)

active_update_task_id = update_alert_group_slack_message.apply_async((self.pk,), countdown=countdown)
self.active_update_task_id = active_update_task_id
self.save(update_fields=["active_update_task_id"])
80 changes: 45 additions & 35 deletions engine/apps/slack/scenarios/distribute_alerts.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,12 @@
import typing
from datetime import datetime

from django.core.cache import cache

from apps.alerts.constants import ActionSource
from apps.alerts.incident_appearance.renderers.constants import DEFAULT_BACKUP_TITLE
from apps.alerts.incident_appearance.renderers.slack_renderer import AlertSlackRenderer
from apps.alerts.models import Alert, AlertGroup, AlertGroupLogRecord, AlertReceiveChannel, Invitation
from apps.api.permissions import RBACPermission
from apps.slack.chatops_proxy_routing import make_private_metadata, make_value
from apps.slack.constants import CACHE_UPDATE_INCIDENT_SLACK_MESSAGE_LIFETIME
from apps.slack.errors import (
SlackAPIChannelArchivedError,
SlackAPIChannelNotFoundError,
Expand All @@ -24,7 +21,7 @@
)
from apps.slack.scenarios import scenario_step
from apps.slack.slack_formatter import SlackFormatter
from apps.slack.tasks import send_message_to_thread_if_bot_not_in_channel, update_incident_slack_message
from apps.slack.tasks import send_message_to_thread_if_bot_not_in_channel
from apps.slack.types import (
Block,
BlockActionType,
Expand All @@ -35,7 +32,6 @@
PayloadType,
ScenarioRoute,
)
from apps.slack.utils import get_cache_key_update_incident_slack_message
from common.utils import clean_markup, is_string_with_visible_characters

from .step_mixins import AlertGroupActionsMixin
Expand Down Expand Up @@ -90,18 +86,25 @@ def process_signal(self, alert: Alert) -> None:
self._send_message_to_thread_if_bot_not_in_channel(alert.group, channel_id)
else:
# check if alert group was posted to slack before updating its message
if not alert.group.skip_escalation_in_slack:
update_task_id = update_incident_slack_message.apply_async(
(self.slack_team_identity.pk, alert.group.pk),
countdown=10,
alert_group = alert.group
if not alert_group:
# this case should hypothetically never happen, it's mostly to appease mypy with the
# fact that alert.group can "technically" be None
logger.info(
f"Skip updating alert group in Slack because alert {alert.pk} doesn't actually "
"have an alert group associated with it"
)
cache.set(
get_cache_key_update_incident_slack_message(alert.group.pk),
update_task_id,
timeout=CACHE_UPDATE_INCIDENT_SLACK_MESSAGE_LIFETIME,
return

alert_group_slack_message = alert_group.slack_message
if not alert_group_slack_message:
logger.info(
f"Skip updating alert group in Slack because alert_group {alert_group.pk} doesn't "
"have a slack message associated with it"
)
else:
logger.info("Skip updating alert_group in Slack due to rate limit")
return

alert_group_slack_message.update_alert_groups_message()

def _send_first_alert(self, alert: Alert, channel_id: str) -> None:
attachments = alert.group.render_slack_attachments()
Expand Down Expand Up @@ -236,13 +239,14 @@ def process_scenario(
# for old version with user slack_id selection
warning_text = "Oops! Something goes wrong, please try again"
self.open_warning_window(payload, warning_text)

if selected_user is not None:
Invitation.invite_user(selected_user, alert_group, self.user)
else:
self.alert_group_slack_service.update_alert_group_slack_message(alert_group)
alert_group.slack_message.update_alert_groups_message()

def process_signal(self, log_record: AlertGroupLogRecord) -> None:
self.alert_group_slack_service.update_alert_group_slack_message(log_record.alert_group)
log_record.alert_group.slack_message.update_alert_groups_message()


class SilenceGroupStep(AlertGroupActionsMixin, scenario_step.ScenarioStep):
Expand Down Expand Up @@ -272,7 +276,7 @@ def process_scenario(
)

def process_signal(self, log_record: AlertGroupLogRecord) -> None:
self.alert_group_slack_service.update_alert_group_slack_message(log_record.alert_group)
log_record.alert_group.slack_message.update_alert_groups_message()


class UnSilenceGroupStep(AlertGroupActionsMixin, scenario_step.ScenarioStep):
Expand All @@ -293,7 +297,7 @@ def process_scenario(
alert_group.un_silence_by_user_or_backsync(self.user, action_source=ActionSource.SLACK)

def process_signal(self, log_record: AlertGroupLogRecord) -> None:
self.alert_group_slack_service.update_alert_group_slack_message(log_record.alert_group)
log_record.alert_group.slack_message.update_alert_groups_message()


class SelectAttachGroupStep(AlertGroupActionsMixin, scenario_step.ScenarioStep):
Expand Down Expand Up @@ -464,7 +468,7 @@ def process_signal(self, log_record: AlertGroupLogRecord) -> None:
unfurl_links=True,
)

self.alert_group_slack_service.update_alert_group_slack_message(alert_group)
alert_group.slack_message.update_alert_groups_message()

def process_scenario(
self,
Expand Down Expand Up @@ -534,7 +538,7 @@ def process_scenario(
alert_group.un_attach_by_user(self.user, action_source=ActionSource.SLACK)

def process_signal(self, log_record: AlertGroupLogRecord) -> None:
self.alert_group_slack_service.update_alert_group_slack_message(log_record.alert_group)
log_record.alert_group.slack_message.update_alert_groups_message()


class StopInvitationProcess(AlertGroupActionsMixin, scenario_step.ScenarioStep):
Expand Down Expand Up @@ -567,7 +571,7 @@ def process_scenario(
Invitation.stop_invitation(invitation_id, self.user)

def process_signal(self, log_record: AlertGroupLogRecord) -> None:
self.alert_group_slack_service.update_alert_group_slack_message(log_record.invitation.alert_group)
log_record.alert_group.slack_message.update_alert_groups_message()


class ResolveGroupStep(AlertGroupActionsMixin, scenario_step.ScenarioStep):
Expand Down Expand Up @@ -605,11 +609,10 @@ def process_scenario(
alert_group.resolve_by_user_or_backsync(self.user, action_source=ActionSource.SLACK)

def process_signal(self, log_record: AlertGroupLogRecord) -> None:
alert_group = log_record.alert_group
# Do not rerender alert_groups which happened while maintenance.
# They have no slack messages, since they just attached to the maintenance incident.
if not alert_group.happened_while_maintenance:
self.alert_group_slack_service.update_alert_group_slack_message(alert_group)
if not log_record.alert_group.happened_while_maintenance:
log_record.alert_group.slack_message.update_alert_groups_message()


class UnResolveGroupStep(AlertGroupActionsMixin, scenario_step.ScenarioStep):
Expand All @@ -630,7 +633,7 @@ def process_scenario(
alert_group.un_resolve_by_user_or_backsync(self.user, action_source=ActionSource.SLACK)

def process_signal(self, log_record: AlertGroupLogRecord) -> None:
self.alert_group_slack_service.update_alert_group_slack_message(log_record.alert_group)
log_record.alert_group.slack_message.update_alert_groups_message()


class AcknowledgeGroupStep(AlertGroupActionsMixin, scenario_step.ScenarioStep):
Expand All @@ -651,7 +654,7 @@ def process_scenario(
alert_group.acknowledge_by_user_or_backsync(self.user, action_source=ActionSource.SLACK)

def process_signal(self, log_record: AlertGroupLogRecord) -> None:
self.alert_group_slack_service.update_alert_group_slack_message(log_record.alert_group)
log_record.alert_group.slack_message.update_alert_groups_message()


class UnAcknowledgeGroupStep(AlertGroupActionsMixin, scenario_step.ScenarioStep):
Expand All @@ -678,7 +681,9 @@ def process_signal(self, log_record: AlertGroupLogRecord) -> None:
logger.debug(f"Started process_signal in UnAcknowledgeGroupStep for alert_group {alert_group.pk}")

if log_record.type == AlertGroupLogRecord.TYPE_AUTO_UN_ACK:
channel_id = alert_group.slack_message.channel_id
alert_group_slack_message = alert_group.slack_message
channel_id = alert_group_slack_message.channel_id

if log_record.author is not None:
user_verbal = log_record.author.get_username_with_slack_verbal(mention=True)
else:
Expand All @@ -695,11 +700,12 @@ def process_signal(self, log_record: AlertGroupLogRecord) -> None:
f"{user_verbal} hasn't responded to an acknowledge timeout reminder."
f" Alert Group is unacknowledged automatically."
)
if alert_group.slack_message.ack_reminder_message_ts:

if alert_group_slack_message.ack_reminder_message_ts:
try:
self._slack_client.chat_update(
channel=channel_id,
ts=alert_group.slack_message.ack_reminder_message_ts,
ts=alert_group_slack_message.ack_reminder_message_ts,
text=text,
attachments=message_attachments,
)
Expand All @@ -714,7 +720,9 @@ def process_signal(self, log_record: AlertGroupLogRecord) -> None:
self.alert_group_slack_service.publish_message_to_alert_group_thread(
alert_group, attachments=message_attachments, text=text
)
self.alert_group_slack_service.update_alert_group_slack_message(alert_group)

alert_group_slack_message.update_alert_groups_message()

logger.debug(f"Finished process_signal in UnAcknowledgeGroupStep for alert_group {alert_group.pk}")


Expand Down Expand Up @@ -825,10 +833,12 @@ def process_signal(self, log_record: AlertGroupLogRecord) -> None:
class WipeGroupStep(scenario_step.ScenarioStep):
def process_signal(self, log_record: AlertGroupLogRecord) -> None:
alert_group = log_record.alert_group
user_verbal = log_record.author.get_username_with_slack_verbal()
text = f"Wiped by {user_verbal}"
self.alert_group_slack_service.publish_message_to_alert_group_thread(alert_group, text=text)
self.alert_group_slack_service.update_alert_group_slack_message(alert_group)

self.alert_group_slack_service.publish_message_to_alert_group_thread(
alert_group,
text=f"Wiped by {log_record.author.get_username_with_slack_verbal()}",
)
alert_group.slack_message.update_alert_groups_message()


class DeleteGroupStep(scenario_step.ScenarioStep):
Expand Down
Loading

0 comments on commit 6229ff3

Please sign in to comment.