-
Notifications
You must be signed in to change notification settings - Fork 14.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix(alerts&reports): add celery soft timeout support #13436
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,8 @@ | |
from typing import Optional | ||
|
||
import numpy as np | ||
import pandas as pd | ||
from celery.exceptions import SoftTimeLimitExceeded | ||
from flask_babel import lazy_gettext as _ | ||
|
||
from superset import jinja_context | ||
|
@@ -30,6 +32,7 @@ | |
AlertQueryInvalidTypeError, | ||
AlertQueryMultipleColumnsError, | ||
AlertQueryMultipleRowsError, | ||
AlertQueryTimeout, | ||
AlertValidatorConfigError, | ||
) | ||
|
||
|
@@ -48,6 +51,19 @@ def __init__(self, report_schedule: ReportSchedule): | |
self._result: Optional[float] = None | ||
|
||
def run(self) -> bool: | ||
""" | ||
Executes an alert SQL query and validates it. | ||
Will set the report_schedule.last_value or last_value_row_json | ||
with the query result | ||
|
||
:return: bool, if the alert triggered or not | ||
:raises: AlertQueryError, | ||
AlertQueryInvalidTypeError, | ||
AlertQueryMultipleColumnsError, | ||
AlertQueryMultipleRowsError, | ||
AlertQueryTimeout, | ||
AlertValidatorConfigError | ||
""" | ||
self.validate() | ||
|
||
if self._is_validator_not_null: | ||
|
@@ -112,9 +128,12 @@ def _is_validator_operator(self) -> bool: | |
self._report_schedule.validator_type == ReportScheduleValidatorType.OPERATOR | ||
) | ||
|
||
def validate(self) -> None: | ||
def _execute_query(self) -> pd.DataFrame: | ||
""" | ||
Validate the query result as a Pandas DataFrame | ||
Executes the actual alert SQL query template | ||
|
||
:return: A pandas dataframe | ||
:raises: AlertQueryTimeout, AlertQueryError | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here |
||
""" | ||
sql_template = jinja_context.get_template_processor( | ||
database=self._report_schedule.database | ||
|
@@ -124,10 +143,18 @@ def validate(self) -> None: | |
limited_rendered_sql = self._report_schedule.database.apply_limit_to_sql( | ||
rendered_sql, ALERT_SQL_LIMIT | ||
) | ||
df = self._report_schedule.database.get_df(limited_rendered_sql) | ||
return self._report_schedule.database.get_df(limited_rendered_sql) | ||
except SoftTimeLimitExceeded: | ||
raise AlertQueryTimeout() | ||
except Exception as ex: | ||
raise AlertQueryError(message=str(ex)) | ||
|
||
def validate(self) -> None: | ||
""" | ||
Validate the query result as a Pandas DataFrame | ||
""" | ||
df = self._execute_query() | ||
|
||
if df.empty and self._is_validator_not_null: | ||
self._result = None | ||
return | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -155,6 +155,14 @@ class AlertQueryError(CommandException): | |
message = _("Alert found an error while executing a query.") | ||
|
||
|
||
class AlertQueryTimeout(CommandException): | ||
message = _("A timeout occurred while executing the query.") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As this can happen during either the query or the thumbnail computation, perhaps we should reword this as "A timeout occurred while executing the background job." - is this the same message that would trigger when a Report times out? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not the same message, this is really specific for queries There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. created a specific exception for screenshots timeout also |
||
|
||
|
||
class ReportScheduleScreenshotTimeout(CommandException): | ||
message = _("A timeout occurred while taking a screenshot.") | ||
|
||
|
||
class ReportScheduleAlertGracePeriodError(CommandException): | ||
message = _("Alert fired during grace period.") | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,10 +18,11 @@ | |
from datetime import datetime, timedelta | ||
from typing import Any, List, Optional | ||
|
||
from celery.exceptions import SoftTimeLimitExceeded | ||
from flask_appbuilder.security.sqla.models import User | ||
from sqlalchemy.orm import Session | ||
|
||
from superset import app, thumbnail_cache | ||
from superset import app | ||
from superset.commands.base import BaseCommand | ||
from superset.commands.exceptions import CommandException | ||
from superset.models.reports import ( | ||
|
@@ -39,6 +40,7 @@ | |
ReportScheduleNotificationError, | ||
ReportSchedulePreviousWorkingError, | ||
ReportScheduleScreenshotFailedError, | ||
ReportScheduleScreenshotTimeout, | ||
ReportScheduleSelleniumUserNotFoundError, | ||
ReportScheduleStateNotFoundError, | ||
ReportScheduleUnexpectedError, | ||
|
@@ -172,9 +174,14 @@ def _get_screenshot(self) -> ScreenshotData: | |
) | ||
image_url = self._get_url(user_friendly=True) | ||
user = self._get_screenshot_user() | ||
image_data = screenshot.compute_and_cache( | ||
user=user, cache=thumbnail_cache, force=True, | ||
) | ||
try: | ||
image_data = screenshot.get_screenshot(user=user) | ||
except SoftTimeLimitExceeded as ex: | ||
raise ReportScheduleScreenshotTimeout() | ||
except Exception as ex: | ||
raise ReportScheduleScreenshotFailedError( | ||
f"Failed taking a screenshot {str(ex)}" | ||
) | ||
Comment on lines
+181
to
+184
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm stealing codecov's comment here, but could we add a test for this broad exception case, too? |
||
if not image_data: | ||
raise ReportScheduleScreenshotFailedError() | ||
return ScreenshotData(url=image_url, image=image_data) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: while there are many conventions out there, I believe this is the one we mostly use in Superset: