Skip to content

Commit

Permalink
fix(aci milestone 3): updates to dual writing data conditions (#83275)
Browse files Browse the repository at this point in the history
When dual writing `AlertRuleTrigger`s to the ACI tables, we must create
2 data conditions:
1. The "detector trigger," which compares against the trigger threshold
value and returns a `DetectorPriorityLevel` as its condition result
2. The "action filter," which checks the detector status and returns
`True` as its condition result. Each of these "action filters" has its
own `DataConditionGroup` as well, which is linked back to the legacy
alert rule's associated workflow.

Implement this paradigm. Also add logic to automatically set a resolve
threshold if it's not set by the user on the legacy alert rule.
  • Loading branch information
mifu67 authored and andrewshie-sentry committed Jan 22, 2025
1 parent 993d7d4 commit f675818
Show file tree
Hide file tree
Showing 2 changed files with 226 additions and 31 deletions.
127 changes: 110 additions & 17 deletions src/sentry/workflow_engine/migration_helpers/alert_rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,15 +94,46 @@ def migrate_metric_action(
return action, data_condition_group_action


def migrate_metric_data_condition(
def migrate_metric_data_conditions(
alert_rule_trigger: AlertRuleTrigger,
) -> tuple[DataCondition, AlertRuleTriggerDataCondition] | None:
) -> tuple[DataCondition, DataCondition, AlertRuleTriggerDataCondition] | None:
alert_rule = alert_rule_trigger.alert_rule
# create a data condition for the Detector's data condition group with the
# threshold and associated priority level
alert_rule_detector = AlertRuleDetector.objects.select_related(
"detector__workflow_condition_group"
).get(alert_rule=alert_rule)
detector = alert_rule_detector.detector
detector_data_condition_group = detector.workflow_condition_group
if detector_data_condition_group is None:
logger.error("workflow_condition_group does not exist", extra={"detector": detector})
return None

threshold_type = (
Condition.GREATER
if alert_rule.threshold_type == AlertRuleThresholdType.ABOVE.value
else Condition.LESS
)

detector_trigger = DataCondition.objects.create(
comparison=alert_rule_trigger.alert_threshold,
condition_result=(
DetectorPriorityLevel.MEDIUM
if alert_rule_trigger.label == "warning"
else DetectorPriorityLevel.HIGH
),
type=threshold_type,
condition_group=detector_data_condition_group,
)

# create an "action filter": if the detector's status matches a certain priority level,
# then the condition result is set to true
data_condition_group = DataConditionGroup.objects.create(
organization_id=alert_rule.organization_id
)
alert_rule_workflow = AlertRuleWorkflow.objects.get(alert_rule=alert_rule)
alert_rule_workflow = AlertRuleWorkflow.objects.select_related("workflow").get(
alert_rule=alert_rule
)
WorkflowDataConditionGroup.objects.create(
condition_group=data_condition_group,
workflow=alert_rule_workflow.workflow,
Expand All @@ -120,13 +151,82 @@ def migrate_metric_data_condition(
alert_rule_trigger_data_condition = AlertRuleTriggerDataCondition.objects.create(
alert_rule_trigger=alert_rule_trigger, data_condition=data_condition
)
return data_condition, alert_rule_trigger_data_condition
return detector_trigger, data_condition, alert_rule_trigger_data_condition


def migrate_resolve_threshold_data_condition(alert_rule: AlertRule) -> DataCondition:
def get_resolve_threshold(detector_data_condition_group: DataConditionGroup) -> float:
"""
Create data conditions for rules with a resolve threshold
Helper method to get the resolve threshold for a Detector if none is specified on
the legacy AlertRule.
"""
detector_triggers = DataCondition.objects.filter(condition_group=detector_data_condition_group)
if detector_triggers.count() > 1:
# there is a warning threshold for this detector, so we should resolve based on it
warning_data_condition = detector_triggers.filter(
condition_result=DetectorPriorityLevel.MEDIUM
).first()
if warning_data_condition is None:
logger.error(
"more than one detector trigger in detector data condition group, but no warning condition exists",
extra={"detector_data_condition_group": detector_triggers},
)
return -1
resolve_threshold = warning_data_condition.comparison
else:
# critical threshold value
critical_data_condition = detector_triggers.first()
if critical_data_condition is None:
logger.error(
"no data conditions exist for detector data condition group",
extra={"detector_data_condition_group": detector_triggers},
)
return -1
resolve_threshold = critical_data_condition.comparison

return resolve_threshold


def migrate_resolve_threshold_data_conditions(
alert_rule: AlertRule,
) -> tuple[DataCondition, DataCondition] | None:
"""
Create data conditions for the old world's "resolve" threshold. If a resolve threshold
has been explicitly set on the alert rule, then use this as our comparison value. Otherwise,
we need to figure out what the resolve threshold is based on the trigger threshold values.
"""
alert_rule_detector = AlertRuleDetector.objects.select_related(
"detector__workflow_condition_group"
).get(alert_rule=alert_rule)
detector = alert_rule_detector.detector
detector_data_condition_group = detector.workflow_condition_group
if detector_data_condition_group is None:
logger.error("workflow_condition_group does not exist", extra={"detector": detector})
return None

# XXX: we set the resolve trigger's threshold_type to whatever the opposite of the rule's threshold_type is
# e.g. if the rule has a critical trigger ABOVE some number, the resolve threshold is automatically set to BELOW
threshold_type = (
Condition.LESS_OR_EQUAL
if alert_rule.threshold_type == AlertRuleThresholdType.ABOVE.value
else Condition.GREATER_OR_EQUAL
)

if alert_rule.resolve_threshold is not None:
resolve_threshold = alert_rule.resolve_threshold
else:
# figure out the resolve threshold ourselves
resolve_threshold = get_resolve_threshold(detector_data_condition_group)
if resolve_threshold == -1:
# something went wrong
return None

detector_trigger = DataCondition.objects.create(
comparison=resolve_threshold,
condition_result=DetectorPriorityLevel.OK,
type=threshold_type,
condition_group=detector_data_condition_group,
)

data_condition_group = DataConditionGroup.objects.create(
organization_id=alert_rule.organization_id
)
Expand All @@ -135,22 +235,15 @@ def migrate_resolve_threshold_data_condition(alert_rule: AlertRule) -> DataCondi
condition_group=data_condition_group,
workflow=alert_rule_workflow.workflow,
)
threshold_type = (
Condition.LESS
if alert_rule.threshold_type == AlertRuleThresholdType.ABOVE.value
else Condition.GREATER
)
# XXX: we set the resolve trigger's threshold_type to whatever the opposite of the rule's threshold_type is
# e.g. if the rule has a critical trigger ABOVE some number, the resolve threshold is automatically set to BELOW

data_condition = DataCondition.objects.create(
comparison=alert_rule.resolve_threshold,
condition_result=DetectorPriorityLevel.OK,
type=threshold_type,
comparison=DetectorPriorityLevel.OK,
condition_result=True,
type=Condition.ISSUE_PRIORITY_EQUALS,
condition_group=data_condition_group,
)
# XXX: can't make an AlertRuleTriggerDataCondition since this isn't really a trigger
return data_condition
return detector_trigger, data_condition


def create_metric_alert_lookup_tables(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from sentry.deletions.tasks.scheduled import run_scheduled_deletions
from sentry.incidents.grouptype import MetricAlertFire
from sentry.incidents.models.alert_rule import AlertRuleTriggerAction
from sentry.incidents.models.alert_rule import AlertRuleThresholdType, AlertRuleTriggerAction
from sentry.integrations.models.integration import Integration
from sentry.integrations.models.organization_integration import OrganizationIntegration
from sentry.snuba.models import QuerySubscription
Expand All @@ -11,10 +11,11 @@
from sentry.users.services.user.service import user_service
from sentry.workflow_engine.migration_helpers.alert_rule import (
dual_delete_migrated_alert_rule,
get_resolve_threshold,
migrate_alert_rule,
migrate_metric_action,
migrate_metric_data_condition,
migrate_resolve_threshold_data_condition,
migrate_metric_data_conditions,
migrate_resolve_threshold_data_conditions,
)
from sentry.workflow_engine.models import (
Action,
Expand Down Expand Up @@ -194,9 +195,9 @@ def test_create_metric_alert_trigger(self):
Test that when we call the helper methods we create all the ACI models correctly for alert rule triggers
"""
migrate_alert_rule(self.metric_alert, self.rpc_user)
migrate_metric_data_condition(self.alert_rule_trigger_warning)
migrate_metric_data_condition(self.alert_rule_trigger_critical)
migrate_resolve_threshold_data_condition(self.metric_alert)
migrate_metric_data_conditions(self.alert_rule_trigger_warning)
migrate_metric_data_conditions(self.alert_rule_trigger_critical)
migrate_resolve_threshold_data_conditions(self.metric_alert)

assert (
AlertRuleTriggerDataCondition.objects.filter(
Expand All @@ -207,12 +208,38 @@ def test_create_metric_alert_trigger(self):
).count()
== 2
)
detector_triggers = DataCondition.objects.filter(
comparison__in=[
self.alert_rule_trigger_warning.alert_threshold,
self.alert_rule_trigger_critical.alert_threshold,
self.metric_alert.resolve_threshold,
]
)

assert len(detector_triggers) == 3
detector = AlertRuleDetector.objects.get(alert_rule=self.metric_alert).detector

warning_detector_trigger = detector_triggers[0]
critical_detector_trigger = detector_triggers[1]
resolve_detector_trigger = detector_triggers[2]

assert warning_detector_trigger.type == Condition.GREATER
assert warning_detector_trigger.condition_result == DetectorPriorityLevel.MEDIUM
assert warning_detector_trigger.condition_group == detector.workflow_condition_group

assert critical_detector_trigger.type == Condition.GREATER
assert critical_detector_trigger.condition_result == DetectorPriorityLevel.HIGH
assert critical_detector_trigger.condition_group == detector.workflow_condition_group

assert resolve_detector_trigger.type == Condition.LESS_OR_EQUAL
assert resolve_detector_trigger.condition_result == DetectorPriorityLevel.OK
assert resolve_detector_trigger.condition_group == detector.workflow_condition_group

data_conditions = DataCondition.objects.filter(
comparison__in=[
DetectorPriorityLevel.MEDIUM,
DetectorPriorityLevel.HIGH,
self.metric_alert.resolve_threshold,
DetectorPriorityLevel.OK,
]
)
assert len(data_conditions) == 3
Expand All @@ -236,19 +263,94 @@ def test_create_metric_alert_trigger(self):
condition_group=critical_data_condition.condition_group
).exists()

assert resolve_data_condition.type == Condition.LESS
assert resolve_data_condition.comparison == self.metric_alert.resolve_threshold
assert resolve_data_condition.condition_result == DetectorPriorityLevel.OK
assert resolve_data_condition.type == Condition.ISSUE_PRIORITY_EQUALS
assert resolve_data_condition.comparison == DetectorPriorityLevel.OK
assert resolve_data_condition.condition_result is True
assert resolve_data_condition.condition_group == resolve_data_condition.condition_group
assert WorkflowDataConditionGroup.objects.filter(
condition_group=resolve_data_condition.condition_group
).exists()

def test_calculate_resolve_threshold_critical_only(self):
migrate_alert_rule(self.metric_alert, self.rpc_user)
migrate_metric_data_conditions(self.alert_rule_trigger_critical)

detector = AlertRuleDetector.objects.get(alert_rule=self.metric_alert).detector
detector_dcg = detector.workflow_condition_group
assert detector_dcg
resolve_threshold = get_resolve_threshold(detector_dcg)
assert resolve_threshold == self.alert_rule_trigger_critical.alert_threshold

def test_calculate_resolve_threshold_with_warning(self):
migrate_alert_rule(self.metric_alert, self.rpc_user)
migrate_metric_data_conditions(self.alert_rule_trigger_warning)
migrate_metric_data_conditions(self.alert_rule_trigger_critical)

detector = AlertRuleDetector.objects.get(alert_rule=self.metric_alert).detector
detector_dcg = detector.workflow_condition_group
assert detector_dcg
resolve_threshold = get_resolve_threshold(detector_dcg)
assert resolve_threshold == self.alert_rule_trigger_warning.alert_threshold

def create_metric_alert_trigger_auto_resolve(self):
"""
Test that we create the correct resolution DataConditions when an AlertRule has no explicit resolve threshold
"""
metric_alert = self.create_alert_rule()
critical_trigger = self.create_alert_rule_trigger(alert_rule=metric_alert, label="critical")

migrate_alert_rule(metric_alert, self.rpc_user)
migrate_metric_data_conditions(critical_trigger)

detector = AlertRuleDetector.objects.get(alert_rule=metric_alert).detector

resolve_detector_trigger = DataCondition.objects.get(
condition_result=DetectorPriorityLevel.OK
)

assert resolve_detector_trigger.type == Condition.LESS_OR_EQUAL
assert resolve_detector_trigger.comparison == critical_trigger.alert_threshold
assert resolve_detector_trigger.condition_result == DetectorPriorityLevel.OK
assert resolve_detector_trigger.condition_group == detector.workflow_condition_group

resolve_data_condition = DataCondition.objects.get(comparison=DetectorPriorityLevel.OK)

assert resolve_data_condition.type == Condition.ISSUE_PRIORITY_EQUALS
assert resolve_data_condition.condition_result is True
assert resolve_data_condition.condition_group == resolve_data_condition.condition_group
assert WorkflowDataConditionGroup.objects.filter(
condition_group=resolve_data_condition.condition_group
).exists()

def create_metric_alert_trigger_auto_resolve_less_than(self):
"""
Test that we assign the resolve detector trigger the correct type if the threshold type is ABOVE
"""
metric_alert = self.create_alert_rule(threshold_type=AlertRuleThresholdType.ABOVE)
critical_trigger = self.create_alert_rule_trigger(alert_rule=metric_alert, label="critical")

migrate_alert_rule(metric_alert, self.rpc_user)
migrate_metric_data_conditions(critical_trigger)

detector = AlertRuleDetector.objects.get(alert_rule=metric_alert).detector

resolve_detector_trigger = DataCondition.objects.get(
condition_result=DetectorPriorityLevel.OK
)

assert resolve_detector_trigger.type == Condition.GREATER_OR_EQUAL
assert resolve_detector_trigger.comparison == critical_trigger.alert_threshold
assert resolve_detector_trigger.condition_result == DetectorPriorityLevel.OK
assert resolve_detector_trigger.condition_group == detector.workflow_condition_group

def test_create_metric_alert_trigger_action(self):
"""
Test that when we call the helper methods we create all the ACI models correctly for alert rule trigger actions
"""
migrate_alert_rule(self.metric_alert, self.rpc_user)

migrate_metric_data_condition(self.alert_rule_trigger_warning)
migrate_metric_data_condition(self.alert_rule_trigger_critical)
migrate_metric_data_conditions(self.alert_rule_trigger_warning)
migrate_metric_data_conditions(self.alert_rule_trigger_critical)

migrate_metric_action(self.alert_rule_trigger_action_email)
migrate_metric_action(self.alert_rule_trigger_action_integration)
Expand Down Expand Up @@ -286,7 +388,7 @@ def test_create_metric_alert_trigger_action_no_alert_rule_trigger_data_condition
other_alert_rule_trigger = self.create_alert_rule_trigger(alert_rule=other_metric_alert)

migrate_alert_rule(other_metric_alert, self.rpc_user)
migrate_metric_data_condition(other_alert_rule_trigger)
migrate_metric_data_conditions(other_alert_rule_trigger)
migrated_action = migrate_metric_action(self.alert_rule_trigger_action_email)
assert migrated_action is None
mock_logger.exception.assert_called_with(
Expand All @@ -306,7 +408,7 @@ def test_create_metric_alert_trigger_action_no_type(self, mock_logger):
self.integration.save()

migrate_alert_rule(self.metric_alert, self.rpc_user)
migrate_metric_data_condition(self.alert_rule_trigger_critical)
migrate_metric_data_conditions(self.alert_rule_trigger_critical)
migrated = migrate_metric_action(self.alert_rule_trigger_action_integration)
assert migrated is None
mock_logger.warning.assert_called_with(
Expand Down

0 comments on commit f675818

Please sign in to comment.