Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(reports): Set a minimum interval for each report's execution #28176

Merged
merged 6 commits into from
May 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions docs/docs/configuration/alerts-reports.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,15 @@ Please refer to `ExecutorType` in the codebase for other executor types.
its default value of `http://0.0.0.0:8080/`.


It's also possible to specify a minimum interval between each report's execution through the config file:

``` python
# Set a minimum interval threshold between executions (for each Alert/Report)
# Value should be an integer
ALERT_MINIMUM_INTERVAL = int(timedelta(minutes=10).total_seconds())
REPORT_MINIMUM_INTERVAL = int(timedelta(minutes=5).total_seconds())
```

## Custom Dockerfile

If you're running the dev version of a released Superset image, like `apache/superset:3.1.0-dev`, you should be set with the above.
Expand Down
47 changes: 46 additions & 1 deletion superset/commands/report/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
import logging
from typing import Any

from croniter import croniter
from flask import current_app
from marshmallow import ValidationError

from superset.commands.base import BaseCommand
Expand All @@ -26,11 +28,12 @@
DashboardNotFoundValidationError,
DashboardNotSavedValidationError,
ReportScheduleEitherChartOrDashboardError,
ReportScheduleFrequencyNotAllowed,
ReportScheduleOnlyChartOrDashboardError,
)
from superset.daos.chart import ChartDAO
from superset.daos.dashboard import DashboardDAO
from superset.reports.models import ReportCreationMethod
from superset.reports.models import ReportCreationMethod, ReportScheduleType

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -76,3 +79,45 @@ def validate_chart_dashboard(
self._properties["dashboard"] = dashboard
elif not update:
exceptions.append(ReportScheduleEitherChartOrDashboardError())

def validate_report_frequency(
self,
cron_schedule: str,
report_type: str,
) -> None:
"""
Validates if the report scheduled frequency doesn't exceed a limit
configured in `config.py`.
:param cron_schedule: The cron schedule configured.
:param report_type: The report type (Alert/Report).
"""
config_key = (
"ALERT_MINIMUM_INTERVAL"
if report_type == ReportScheduleType.ALERT
else "REPORT_MINIMUM_INTERVAL"
)
minimum_interval = current_app.config.get(config_key, 0)

if not isinstance(minimum_interval, int):
logger.error(
"Invalid value for %s: %s", config_key, minimum_interval, exc_info=True
)
return
Comment on lines +103 to +106
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I decided to avoid raising a ValidationError here to prevent blocking users from creating alerts/reports until the configuration is fixed by an admin. Open to feedback, tho


# Since configuration is in minutes, we only need to validate
# in case `minimum_interval` is <= 120 (2min)
if minimum_interval < 120:
return

iterations = 60 if minimum_interval <= 3660 else 24
schedule = croniter(cron_schedule)
current_exec = next(schedule)

for _ in range(iterations):
next_exec = next(schedule)
diff, current_exec = next_exec - current_exec, next_exec
if int(diff) < minimum_interval:
raise ReportScheduleFrequencyNotAllowed(
report_type=report_type, minimum_interval=minimum_interval
)
54 changes: 34 additions & 20 deletions superset/commands/report/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
ReportScheduleCreationMethodUniquenessValidationError,
ReportScheduleInvalidError,
ReportScheduleNameUniquenessValidationError,
ReportScheduleRequiredTypeValidationError,
)
from superset.daos.database import DatabaseDAO
from superset.daos.exceptions import DAOCreateFailedError
Expand Down Expand Up @@ -58,38 +57,53 @@ def run(self) -> ReportSchedule:
raise ReportScheduleCreateFailedError() from ex

def validate(self) -> None:
exceptions: list[ValidationError] = []
owner_ids: Optional[list[int]] = self._properties.get("owners")
name = self._properties.get("name", "")
report_type = self._properties.get("type")
creation_method = self._properties.get("creation_method")
"""
Validates the properties of a report schedule configuration, including uniqueness
of name and type, relations based on the report type, frequency, etc. Populates
a list of `ValidationErrors` to be returned in the API response if any.

Fields were loaded according to the `ReportSchedulePostSchema` schema.
"""
# Required fields
cron_schedule = self._properties["crontab"]
name = self._properties["name"]
report_type = self._properties["type"]

# Optional fields
chart_id = self._properties.get("chart")
creation_method = self._properties.get("creation_method")
dashboard_id = self._properties.get("dashboard")
owner_ids: Optional[list[int]] = self._properties.get("owners")

# Validate type is required
if not report_type:
exceptions.append(ReportScheduleRequiredTypeValidationError())
exceptions: list[ValidationError] = []

# Validate name type uniqueness
if report_type and not ReportScheduleDAO.validate_update_uniqueness(
name, report_type
):
if not ReportScheduleDAO.validate_update_uniqueness(name, report_type):
exceptions.append(
ReportScheduleNameUniquenessValidationError(
report_type=report_type, name=name
)
)

# validate relation by report type
# Validate if DB exists (for alerts)
if report_type == ReportScheduleType.ALERT:
database_id = self._properties.get("database")
if not database_id:
exceptions.append(ReportScheduleAlertRequiredDatabaseValidationError())
else:
database = DatabaseDAO.find_by_id(database_id)
if not database:
try:
database_id = self._properties["database"]
if database := DatabaseDAO.find_by_id(database_id):
self._properties["database"] = database
else:
exceptions.append(DatabaseNotFoundValidationError())
self._properties["database"] = database
except KeyError:
exceptions.append(ReportScheduleAlertRequiredDatabaseValidationError())

# validate report frequency
try:
self.validate_report_frequency(
cron_schedule,
report_type,
)
except ValidationError as exc:
exceptions.append(exc)

# Validate chart or dashboard relations
self.validate_chart_dashboard(exceptions)
Expand Down
27 changes: 27 additions & 0 deletions superset/commands/report/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
# specific language governing permissions and limitations
# under the License.

import math

from flask_babel import lazy_gettext as _

from superset.commands.exceptions import (
Expand Down Expand Up @@ -93,6 +95,31 @@ def __init__(self) -> None:
)


class ReportScheduleFrequencyNotAllowed(ValidationError):
"""
Marshmallow validation error for report schedule configured to run more
frequently than allowed
"""

def __init__(
self,
report_type: str = "Report",
minimum_interval: int = 120,
) -> None:
interval_in_minutes = math.ceil(minimum_interval / 60)

super().__init__(
_(
"%(report_type)s schedule frequency exceeding limit."
" Please configure a schedule with a minimum interval of"
" %(minimum_interval)d minutes per execution.",
report_type=report_type,
minimum_interval=interval_in_minutes,
),
field_name="crontab",
)


class ChartNotSavedValidationError(ValidationError):
"""
Marshmallow validation error for charts that haven't been saved yet
Expand Down
69 changes: 42 additions & 27 deletions superset/commands/report/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,17 +59,29 @@ def run(self) -> Model:
return report_schedule

def validate(self) -> None:
exceptions: list[ValidationError] = []
owner_ids: Optional[list[int]] = self._properties.get("owners")
report_type = self._properties.get("type", ReportScheduleType.ALERT)

name = self._properties.get("name", "")
"""
Validates the properties of a report schedule configuration, including uniqueness
of name and type, relations based on the report type, frequency, etc. Populates
a list of `ValidationErrors` to be returned in the API response if any.
Fields were loaded according to the `ReportSchedulePutSchema` schema.
"""
# Load existing report schedule config
self._model = ReportScheduleDAO.find_by_id(self._model_id)

# Does the report exist?
if not self._model:
raise ReportScheduleNotFoundError()

# Required fields for validation
cron_schedule = self._properties.get("crontab", self._model.crontab)
name = self._properties.get("name", self._model.name)
report_type = self._properties.get("type", self._model.type)

# Optional fields
database_id = self._properties.get("database")
owner_ids: Optional[list[int]] = self._properties.get("owners")

exceptions: list[ValidationError] = []

# Change the state to not triggered when the user deactivates
# A report that is currently in a working state. This prevents
# an alert/report from being kept in a working state if activated back
Expand All @@ -80,28 +92,31 @@ def validate(self) -> None:
):
self._properties["last_state"] = ReportState.NOOP

# validate relation by report type
if not report_type:
report_type = self._model.type

# Validate name type uniqueness
if not ReportScheduleDAO.validate_update_uniqueness(
name, report_type, expect_id=self._model_id
):
exceptions.append(
ReportScheduleNameUniquenessValidationError(
report_type=report_type, name=name
# Validate name/type uniqueness if either is changing
if name != self._model.name or report_type != self._model.type:
if not ReportScheduleDAO.validate_update_uniqueness(
name, report_type, expect_id=self._model_id
):
exceptions.append(
ReportScheduleNameUniquenessValidationError(
report_type=report_type, name=name
)
)
)

if report_type == ReportScheduleType.ALERT:
database_id = self._properties.get("database")
# If database_id was sent let's validate it exists
if database_id:
database = DatabaseDAO.find_by_id(database_id)
if not database:
exceptions.append(DatabaseNotFoundValidationError())
self._properties["database"] = database
# Validate if DB exists (for alerts)
if report_type == ReportScheduleType.ALERT and database_id:
if not (database := DatabaseDAO.find_by_id(database_id)):
exceptions.append(DatabaseNotFoundValidationError())
self._properties["database"] = database

# validate report frequency
try:
self.validate_report_frequency(
cron_schedule,
report_type,
)
except ValidationError as exc:
exceptions.append(exc)

# Validate chart or dashboard relations
self.validate_chart_dashboard(exceptions, update=True)
Expand Down
4 changes: 4 additions & 0 deletions superset/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1325,6 +1325,10 @@ def EMAIL_HEADER_MUTATOR( # pylint: disable=invalid-name,unused-argument
# Custom width for screenshots
ALERT_REPORTS_MIN_CUSTOM_SCREENSHOT_WIDTH = 600
ALERT_REPORTS_MAX_CUSTOM_SCREENSHOT_WIDTH = 2400
# Set a minimum interval threshold between executions (for each Alert/Report)
# Value should be an integer i.e. int(timedelta(minutes=5).total_seconds())
ALERT_MINIMUM_INTERVAL = int(timedelta(minutes=0).total_seconds())
REPORT_MINIMUM_INTERVAL = int(timedelta(minutes=0).total_seconds())

# A custom prefix to use on all Alerts & Reports emails
EMAIL_REPORTS_SUBJECT_PREFIX = "[Report] "
Expand Down
Loading
Loading