Skip to content

Commit

Permalink
[ML] Delayed data test for Anomaly Detection jobs health rule type (#…
Browse files Browse the repository at this point in the history
…107183)

* [ML] enable test selection

* [ML] executor update for annotations

* [ML] update unit tests

* [ML] fix i18n

* [ML] update schema

* [ML] fix ts

* [ML] account for docs count, update unit tests

* [ML] update translation strings

* [ML] add types

* [ML] fetch the latest annotation sorted by modified_time

* [ML] getDelayedDataAnnotations

* [ML] update unit tests

* [ML] set default number of docs to 1, update schema validation

* [ML] getDelayedDataLookbackTimestamp

* [ML] filter null values, update unit tests

* [ML] account for query delay, refactor with memoize

* [ML] update unit test

* [ML] remove previousStartedAt

* [ML] filter based on the job config

* [ML] fix tests

* [ML] add maps

* [ML] combine filters

* [ML] move range query inside of a filter

* [ML] filter out jobs with missing datafeed

* [ML] resolveLookbackInterval only from jobs with datafeeds

* [ML] do not show an error on empty time interval

* [ML] add help tooltips

* [ML] update description for the datafeed check
  • Loading branch information
darnautov authored Aug 5, 2021
1 parent af3a976 commit 39bd188
Show file tree
Hide file tree
Showing 15 changed files with 595 additions and 112 deletions.
71 changes: 51 additions & 20 deletions x-pack/plugins/ml/common/constants/alerts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,26 +21,57 @@ export const TOP_N_BUCKETS_COUNT = 1;

export const ALL_JOBS_SELECTION = '*';

export const HEALTH_CHECK_NAMES: Record<JobsHealthTests, string> = {
datafeed: i18n.translate('xpack.ml.alertTypes.jobsHealthAlertingRule.datafeedCheckName', {
defaultMessage: 'Datafeed is not started',
}),
mml: i18n.translate('xpack.ml.alertTypes.jobsHealthAlertingRule.mmlCheckName', {
defaultMessage: 'Model memory limit reached',
}),
errorMessages: i18n.translate(
'xpack.ml.alertTypes.jobsHealthAlertingRule.errorMessagesCheckName',
{
export const HEALTH_CHECK_NAMES: Record<JobsHealthTests, { name: string; description: string }> = {
datafeed: {
name: i18n.translate('xpack.ml.alertTypes.jobsHealthAlertingRule.datafeedCheckName', {
defaultMessage: 'Datafeed is not started',
}),
description: i18n.translate(
'xpack.ml.alertTypes.jobsHealthAlertingRule.datafeedCheckDescription',
{
defaultMessage: 'Get alerted if the corresponding datafeed of the job is not started',
}
),
},
mml: {
name: i18n.translate('xpack.ml.alertTypes.jobsHealthAlertingRule.mmlCheckName', {
defaultMessage: 'Model memory limit reached',
}),
description: i18n.translate('xpack.ml.alertTypes.jobsHealthAlertingRule.mmlCheckDescription', {
defaultMessage: 'Get alerted when job reaches soft or hard model memory limit.',
}),
},
delayedData: {
name: i18n.translate('xpack.ml.alertTypes.jobsHealthAlertingRule.delayedDataCheckName', {
defaultMessage: 'Data delay has occurred',
}),
description: i18n.translate(
'xpack.ml.alertTypes.jobsHealthAlertingRule.delayedDataCheckDescription',
{
defaultMessage: 'Get alerted if a job missed data due to data delay.',
}
),
},
errorMessages: {
name: i18n.translate('xpack.ml.alertTypes.jobsHealthAlertingRule.errorMessagesCheckName', {
defaultMessage: 'There are errors in the job messages',
}
),
behindRealtime: i18n.translate(
'xpack.ml.alertTypes.jobsHealthAlertingRule.behindRealtimeCheckName',
{
}),
description: i18n.translate(
'xpack.ml.alertTypes.jobsHealthAlertingRule.errorMessagesCheckDescription',
{
defaultMessage: 'There are errors in the job messages',
}
),
},
behindRealtime: {
name: i18n.translate('xpack.ml.alertTypes.jobsHealthAlertingRule.behindRealtimeCheckName', {
defaultMessage: 'Job is running behind real-time',
}
),
delayedData: i18n.translate('xpack.ml.alertTypes.jobsHealthAlertingRule.delayedDataCheckName', {
defaultMessage: 'Data delay has occurred',
}),
}),
description: i18n.translate(
'xpack.ml.alertTypes.jobsHealthAlertingRule.behindRealtimeCheckDescription',
{
defaultMessage: 'Job is running behind real-time',
}
),
},
};
2 changes: 1 addition & 1 deletion x-pack/plugins/ml/common/types/alerts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ export interface JobAlertingRuleStats {
alerting_rules?: MlAnomalyDetectionAlertRule[];
}

interface CommonHealthCheckConfig {
export interface CommonHealthCheckConfig {
enabled: boolean;
}

Expand Down
7 changes: 6 additions & 1 deletion x-pack/plugins/ml/common/types/annotations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,12 @@ export interface Annotation {
annotation: string;
job_id: string;
type: ANNOTATION_TYPE.ANNOTATION | ANNOTATION_TYPE.COMMENT;
event?: string;
event?:
| 'user'
| 'delayed_data'
| 'model_snapshot_stored'
| 'model_change'
| 'categorization_status_change';
detector_index?: number;
partition_field_name?: string;
partition_field_value?: string;
Expand Down
14 changes: 14 additions & 0 deletions x-pack/plugins/ml/common/util/alerts.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,22 @@ describe('getResultJobsHealthRuleConfig', () => {
mml: {
enabled: true,
},
delayedData: {
docsCount: 1,
enabled: true,
timeInterval: null,
},
});
});
test('returns config with overridden values based on provided configuration', () => {
expect(
getResultJobsHealthRuleConfig({
mml: { enabled: false },
errorMessages: { enabled: true },
delayedData: {
enabled: true,
docsCount: 1,
},
})
).toEqual({
datafeed: {
Expand All @@ -105,6 +114,11 @@ describe('getResultJobsHealthRuleConfig', () => {
mml: {
enabled: false,
},
delayedData: {
docsCount: 1,
enabled: true,
timeInterval: null,
},
});
});
});
4 changes: 3 additions & 1 deletion x-pack/plugins/ml/common/util/alerts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ export function getTopNBuckets(job: Job): number {
return Math.ceil(narrowBucketLength / bucketSpan.asSeconds());
}

const implementedTests = ['datafeed', 'mml'] as JobsHealthTests[];
const implementedTests = ['datafeed', 'mml', 'delayedData'] as JobsHealthTests[];

/**
* Returns tests configuration combined with default values.
Expand All @@ -70,6 +70,8 @@ export function getResultJobsHealthRuleConfig(config: JobsHealthRuleTestsConfig)
},
delayedData: {
enabled: config?.delayedData?.enabled ?? true,
docsCount: config?.delayedData?.docsCount ?? 1,
timeInterval: config?.delayedData?.timeInterval ?? null,
},
behindRealtime: {
enabled: config?.behindRealtime?.enabled ?? true,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import { PluginSetupContract as AlertingSetup } from '../../../../alerting/publi
import { ML_ALERT_TYPES } from '../../../common/constants/alerts';
import { MlAnomalyDetectionJobsHealthRuleParams } from '../../../common/types/alerts';
import { getResultJobsHealthRuleConfig } from '../../../common/util/alerts';
import { validateLookbackInterval } from '../validators';

export function registerJobsHealthAlertingRule(
triggersActionsUi: TriggersAndActionsUIPublicPluginSetup,
Expand All @@ -32,6 +33,7 @@ export function registerJobsHealthAlertingRule(
errors: {
includeJobs: new Array<string>(),
testsConfig: new Array<string>(),
delayedData: new Array<string>(),
} as Record<keyof MlAnomalyDetectionJobsHealthRuleParams, string[]>,
};

Expand All @@ -53,6 +55,31 @@ export function registerJobsHealthAlertingRule(
);
}

if (
!!resultTestConfig.delayedData.timeInterval &&
validateLookbackInterval(resultTestConfig.delayedData.timeInterval)
) {
validationResult.errors.delayedData.push(
i18n.translate(
'xpack.ml.alertTypes.jobsHealthAlertingRule.testsConfig.delayedData.timeIntervalErrorMessage',
{
defaultMessage: 'Invalid time interval',
}
)
);
}

if (resultTestConfig.delayedData.docsCount === 0) {
validationResult.errors.delayedData.push(
i18n.translate(
'xpack.ml.alertTypes.jobsHealthAlertingRule.testsConfig.delayedData.docsCountErrorMessage',
{
defaultMessage: 'Invalid number of documents',
}
)
);
}

return validationResult;
},
requiresAppContext: false,
Expand All @@ -68,6 +95,9 @@ export function registerJobsHealthAlertingRule(
\\{\\{#memory_status\\}\\}Memory status: \\{\\{memory_status\\}\\} \\{\\{/memory_status\\}\\}
\\{\\{#log_time\\}\\}Memory logging time: \\{\\{log_time\\}\\} \\{\\{/log_time\\}\\}
\\{\\{#failed_category_count\\}\\}Failed category count: \\{\\{failed_category_count\\}\\} \\{\\{/failed_category_count\\}\\}
\\{\\{#annotation\\}\\}Annotation: \\{\\{annotation\\}\\} \\{\\{/annotation\\}\\}
\\{\\{#missed_docs_count\\}\\}Number of missed documents: \\{\\{missed_docs_count\\}\\} \\{\\{/missed_docs_count\\}\\}
\\{\\{#end_timestamp\\}\\}Latest finalized bucket with missing docs: \\{\\{end_timestamp\\}\\} \\{\\{/end_timestamp\\}\\}
\\{\\{/context.results\\}\\}
`,
}
Expand Down
Loading

0 comments on commit 39bd188

Please sign in to comment.