Skip to content

Commit

Permalink
feat(reports): Set a minimum interval for each report's execution (ap…
Browse files Browse the repository at this point in the history
  • Loading branch information
Vitor-Avila authored and EnxDev committed May 31, 2024
1 parent ff56520 commit 33b9418
Show file tree
Hide file tree
Showing 8 changed files with 631 additions and 49 deletions.
9 changes: 9 additions & 0 deletions docs/docs/configuration/alerts-reports.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,15 @@ Please refer to `ExecutorType` in the codebase for other executor types.
its default value of `http://0.0.0.0:8080/`.


It's also possible to specify a minimum interval between each report's execution through the config file:

``` python
# Set a minimum interval threshold between executions (for each Alert/Report)
# Value should be an integer
ALERT_MINIMUM_INTERVAL = int(timedelta(minutes=10).total_seconds())
REPORT_MINIMUM_INTERVAL = int(timedelta(minutes=5).total_seconds())
```

## Custom Dockerfile

If you're running the dev version of a released Superset image, like `apache/superset:3.1.0-dev`, you should be set with the above.
Expand Down
47 changes: 46 additions & 1 deletion superset/commands/report/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
import logging
from typing import Any

from croniter import croniter
from flask import current_app
from marshmallow import ValidationError

from superset.commands.base import BaseCommand
Expand All @@ -26,11 +28,12 @@
DashboardNotFoundValidationError,
DashboardNotSavedValidationError,
ReportScheduleEitherChartOrDashboardError,
ReportScheduleFrequencyNotAllowed,
ReportScheduleOnlyChartOrDashboardError,
)
from superset.daos.chart import ChartDAO
from superset.daos.dashboard import DashboardDAO
from superset.reports.models import ReportCreationMethod
from superset.reports.models import ReportCreationMethod, ReportScheduleType

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -76,3 +79,45 @@ def validate_chart_dashboard(
self._properties["dashboard"] = dashboard
elif not update:
exceptions.append(ReportScheduleEitherChartOrDashboardError())

def validate_report_frequency(
self,
cron_schedule: str,
report_type: str,
) -> None:
"""
Validates if the report scheduled frequency doesn't exceed a limit
configured in `config.py`.
:param cron_schedule: The cron schedule configured.
:param report_type: The report type (Alert/Report).
"""
config_key = (
"ALERT_MINIMUM_INTERVAL"
if report_type == ReportScheduleType.ALERT
else "REPORT_MINIMUM_INTERVAL"
)
minimum_interval = current_app.config.get(config_key, 0)

if not isinstance(minimum_interval, int):
logger.error(
"Invalid value for %s: %s", config_key, minimum_interval, exc_info=True
)
return

# Since configuration is in minutes, we only need to validate
# in case `minimum_interval` is <= 120 (2min)
if minimum_interval < 120:
return

iterations = 60 if minimum_interval <= 3660 else 24
schedule = croniter(cron_schedule)
current_exec = next(schedule)

for _ in range(iterations):
next_exec = next(schedule)
diff, current_exec = next_exec - current_exec, next_exec
if int(diff) < minimum_interval:
raise ReportScheduleFrequencyNotAllowed(
report_type=report_type, minimum_interval=minimum_interval
)
54 changes: 34 additions & 20 deletions superset/commands/report/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
ReportScheduleCreationMethodUniquenessValidationError,
ReportScheduleInvalidError,
ReportScheduleNameUniquenessValidationError,
ReportScheduleRequiredTypeValidationError,
)
from superset.daos.database import DatabaseDAO
from superset.daos.exceptions import DAOCreateFailedError
Expand Down Expand Up @@ -58,38 +57,53 @@ def run(self) -> ReportSchedule:
raise ReportScheduleCreateFailedError() from ex

def validate(self) -> None:
exceptions: list[ValidationError] = []
owner_ids: Optional[list[int]] = self._properties.get("owners")
name = self._properties.get("name", "")
report_type = self._properties.get("type")
creation_method = self._properties.get("creation_method")
"""
Validates the properties of a report schedule configuration, including uniqueness
of name and type, relations based on the report type, frequency, etc. Populates
a list of `ValidationErrors` to be returned in the API response if any.
Fields were loaded according to the `ReportSchedulePostSchema` schema.
"""
# Required fields
cron_schedule = self._properties["crontab"]
name = self._properties["name"]
report_type = self._properties["type"]

# Optional fields
chart_id = self._properties.get("chart")
creation_method = self._properties.get("creation_method")
dashboard_id = self._properties.get("dashboard")
owner_ids: Optional[list[int]] = self._properties.get("owners")

# Validate type is required
if not report_type:
exceptions.append(ReportScheduleRequiredTypeValidationError())
exceptions: list[ValidationError] = []

# Validate name type uniqueness
if report_type and not ReportScheduleDAO.validate_update_uniqueness(
name, report_type
):
if not ReportScheduleDAO.validate_update_uniqueness(name, report_type):
exceptions.append(
ReportScheduleNameUniquenessValidationError(
report_type=report_type, name=name
)
)

# validate relation by report type
# Validate if DB exists (for alerts)
if report_type == ReportScheduleType.ALERT:
database_id = self._properties.get("database")
if not database_id:
exceptions.append(ReportScheduleAlertRequiredDatabaseValidationError())
else:
database = DatabaseDAO.find_by_id(database_id)
if not database:
try:
database_id = self._properties["database"]
if database := DatabaseDAO.find_by_id(database_id):
self._properties["database"] = database
else:
exceptions.append(DatabaseNotFoundValidationError())
self._properties["database"] = database
except KeyError:
exceptions.append(ReportScheduleAlertRequiredDatabaseValidationError())

# validate report frequency
try:
self.validate_report_frequency(
cron_schedule,
report_type,
)
except ValidationError as exc:
exceptions.append(exc)

# Validate chart or dashboard relations
self.validate_chart_dashboard(exceptions)
Expand Down
27 changes: 27 additions & 0 deletions superset/commands/report/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
# specific language governing permissions and limitations
# under the License.

import math

from flask_babel import lazy_gettext as _

from superset.commands.exceptions import (
Expand Down Expand Up @@ -93,6 +95,31 @@ def __init__(self) -> None:
)


class ReportScheduleFrequencyNotAllowed(ValidationError):
"""
Marshmallow validation error for report schedule configured to run more
frequently than allowed
"""

def __init__(
self,
report_type: str = "Report",
minimum_interval: int = 120,
) -> None:
interval_in_minutes = math.ceil(minimum_interval / 60)

super().__init__(
_(
"%(report_type)s schedule frequency exceeding limit."
" Please configure a schedule with a minimum interval of"
" %(minimum_interval)d minutes per execution.",
report_type=report_type,
minimum_interval=interval_in_minutes,
),
field_name="crontab",
)


class ChartNotSavedValidationError(ValidationError):
"""
Marshmallow validation error for charts that haven't been saved yet
Expand Down
69 changes: 42 additions & 27 deletions superset/commands/report/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,17 +59,29 @@ def run(self) -> Model:
return report_schedule

def validate(self) -> None:
exceptions: list[ValidationError] = []
owner_ids: Optional[list[int]] = self._properties.get("owners")
report_type = self._properties.get("type", ReportScheduleType.ALERT)

name = self._properties.get("name", "")
"""
Validates the properties of a report schedule configuration, including uniqueness
of name and type, relations based on the report type, frequency, etc. Populates
a list of `ValidationErrors` to be returned in the API response if any.
Fields were loaded according to the `ReportSchedulePutSchema` schema.
"""
# Load existing report schedule config
self._model = ReportScheduleDAO.find_by_id(self._model_id)

# Does the report exist?
if not self._model:
raise ReportScheduleNotFoundError()

# Required fields for validation
cron_schedule = self._properties.get("crontab", self._model.crontab)
name = self._properties.get("name", self._model.name)
report_type = self._properties.get("type", self._model.type)

# Optional fields
database_id = self._properties.get("database")
owner_ids: Optional[list[int]] = self._properties.get("owners")

exceptions: list[ValidationError] = []

# Change the state to not triggered when the user deactivates
# A report that is currently in a working state. This prevents
# an alert/report from being kept in a working state if activated back
Expand All @@ -80,28 +92,31 @@ def validate(self) -> None:
):
self._properties["last_state"] = ReportState.NOOP

# validate relation by report type
if not report_type:
report_type = self._model.type

# Validate name type uniqueness
if not ReportScheduleDAO.validate_update_uniqueness(
name, report_type, expect_id=self._model_id
):
exceptions.append(
ReportScheduleNameUniquenessValidationError(
report_type=report_type, name=name
# Validate name/type uniqueness if either is changing
if name != self._model.name or report_type != self._model.type:
if not ReportScheduleDAO.validate_update_uniqueness(
name, report_type, expect_id=self._model_id
):
exceptions.append(
ReportScheduleNameUniquenessValidationError(
report_type=report_type, name=name
)
)
)

if report_type == ReportScheduleType.ALERT:
database_id = self._properties.get("database")
# If database_id was sent let's validate it exists
if database_id:
database = DatabaseDAO.find_by_id(database_id)
if not database:
exceptions.append(DatabaseNotFoundValidationError())
self._properties["database"] = database
# Validate if DB exists (for alerts)
if report_type == ReportScheduleType.ALERT and database_id:
if not (database := DatabaseDAO.find_by_id(database_id)):
exceptions.append(DatabaseNotFoundValidationError())
self._properties["database"] = database

# validate report frequency
try:
self.validate_report_frequency(
cron_schedule,
report_type,
)
except ValidationError as exc:
exceptions.append(exc)

# Validate chart or dashboard relations
self.validate_chart_dashboard(exceptions, update=True)
Expand Down
4 changes: 4 additions & 0 deletions superset/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1328,6 +1328,10 @@ def EMAIL_HEADER_MUTATOR( # pylint: disable=invalid-name,unused-argument
# Custom width for screenshots
ALERT_REPORTS_MIN_CUSTOM_SCREENSHOT_WIDTH = 600
ALERT_REPORTS_MAX_CUSTOM_SCREENSHOT_WIDTH = 2400
# Set a minimum interval threshold between executions (for each Alert/Report)
# Value should be an integer i.e. int(timedelta(minutes=5).total_seconds())
ALERT_MINIMUM_INTERVAL = int(timedelta(minutes=0).total_seconds())
REPORT_MINIMUM_INTERVAL = int(timedelta(minutes=0).total_seconds())

# A custom prefix to use on all Alerts & Reports emails
EMAIL_REPORTS_SUBJECT_PREFIX = "[Report] "
Expand Down
Loading

0 comments on commit 33b9418

Please sign in to comment.