diff --git a/x-pack/plugins/monitoring/common/constants.ts b/x-pack/plugins/monitoring/common/constants.ts index 860f6439f3fdf..76d9e7517b6ab 100644 --- a/x-pack/plugins/monitoring/common/constants.ts +++ b/x-pack/plugins/monitoring/common/constants.ts @@ -236,6 +236,7 @@ export const ALERT_NODES_CHANGED = `${ALERT_PREFIX}alert_nodes_changed`; export const ALERT_ELASTICSEARCH_VERSION_MISMATCH = `${ALERT_PREFIX}alert_elasticsearch_version_mismatch`; export const ALERT_KIBANA_VERSION_MISMATCH = `${ALERT_PREFIX}alert_kibana_version_mismatch`; export const ALERT_LOGSTASH_VERSION_MISMATCH = `${ALERT_PREFIX}alert_logstash_version_mismatch`; +export const ALERT_MEMORY_USAGE = `${ALERT_PREFIX}alert_jvm_memory_usage`; export const ALERT_MISSING_MONITORING_DATA = `${ALERT_PREFIX}alert_missing_monitoring_data`; /** @@ -250,6 +251,7 @@ export const ALERTS = [ ALERT_ELASTICSEARCH_VERSION_MISMATCH, ALERT_KIBANA_VERSION_MISMATCH, ALERT_LOGSTASH_VERSION_MISMATCH, + ALERT_MEMORY_USAGE, ALERT_MISSING_MONITORING_DATA, ]; diff --git a/x-pack/plugins/monitoring/public/alerts/memory_usage_alert/index.tsx b/x-pack/plugins/monitoring/public/alerts/memory_usage_alert/index.tsx new file mode 100644 index 0000000000000..dd60967a3458b --- /dev/null +++ b/x-pack/plugins/monitoring/public/alerts/memory_usage_alert/index.tsx @@ -0,0 +1,29 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +import React from 'react'; +import { validate } from '../components/duration/validation'; +import { Expression, Props } from '../components/duration/expression'; + +// eslint-disable-next-line @kbn/eslint/no-restricted-paths +import { AlertTypeModel } from '../../../../triggers_actions_ui/public/types'; + +// eslint-disable-next-line @kbn/eslint/no-restricted-paths +import { MemoryUsageAlert } from '../../../server/alerts'; + +export function createMemoryUsageAlertType(): AlertTypeModel { + return { + id: MemoryUsageAlert.TYPE, + name: MemoryUsageAlert.LABEL, + iconClass: 'bell', + alertParamsExpression: (props: Props) => ( + + ), + validate, + defaultActionMessage: '{{context.internalFullMessage}}', + requiresAppContext: true, + }; +} diff --git a/x-pack/plugins/monitoring/public/components/cluster/overview/elasticsearch_panel.js b/x-pack/plugins/monitoring/public/components/cluster/overview/elasticsearch_panel.js index 667f64458b8f9..13324ba3ecac9 100644 --- a/x-pack/plugins/monitoring/public/components/cluster/overview/elasticsearch_panel.js +++ b/x-pack/plugins/monitoring/public/components/cluster/overview/elasticsearch_panel.js @@ -41,6 +41,7 @@ import { ALERT_CLUSTER_HEALTH, ALERT_CPU_USAGE, ALERT_DISK_USAGE, + ALERT_MEMORY_USAGE, ALERT_NODES_CHANGED, ALERT_ELASTICSEARCH_VERSION_MISMATCH, ALERT_MISSING_MONITORING_DATA, @@ -160,6 +161,7 @@ const OVERVIEW_PANEL_ALERTS = [ALERT_CLUSTER_HEALTH, ALERT_LICENSE_EXPIRATION]; const NODES_PANEL_ALERTS = [ ALERT_CPU_USAGE, ALERT_DISK_USAGE, + ALERT_MEMORY_USAGE, ALERT_NODES_CHANGED, ALERT_ELASTICSEARCH_VERSION_MISMATCH, ALERT_MISSING_MONITORING_DATA, diff --git a/x-pack/plugins/monitoring/public/plugin.ts b/x-pack/plugins/monitoring/public/plugin.ts index f4f66185346e8..2af23f3d7b316 100644 --- a/x-pack/plugins/monitoring/public/plugin.ts +++ b/x-pack/plugins/monitoring/public/plugin.ts @@ -26,6 +26,7 @@ import { createCpuUsageAlertType } from './alerts/cpu_usage_alert'; import { createMissingMonitoringDataAlertType } from './alerts/missing_monitoring_data_alert'; import { createLegacyAlertTypes } from './alerts/legacy_alert'; import { createDiskUsageAlertType } from './alerts/disk_usage_alert'; +import { createMemoryUsageAlertType } from './alerts/memory_usage_alert'; interface MonitoringSetupPluginDependencies { home?: HomePublicPluginSetup; @@ -72,12 +73,15 @@ export class MonitoringPlugin }); } - plugins.triggers_actions_ui.alertTypeRegistry.register(createCpuUsageAlertType()); - plugins.triggers_actions_ui.alertTypeRegistry.register(createMissingMonitoringDataAlertType()); - plugins.triggers_actions_ui.alertTypeRegistry.register(createDiskUsageAlertType()); + const { alertTypeRegistry } = plugins.triggers_actions_ui; + alertTypeRegistry.register(createCpuUsageAlertType()); + alertTypeRegistry.register(createDiskUsageAlertType()); + alertTypeRegistry.register(createMemoryUsageAlertType()); + alertTypeRegistry.register(createMissingMonitoringDataAlertType()); + const legacyAlertTypes = createLegacyAlertTypes(); for (const legacyAlertType of legacyAlertTypes) { - plugins.triggers_actions_ui.alertTypeRegistry.register(legacyAlertType); + alertTypeRegistry.register(legacyAlertType); } const app: App = { diff --git a/x-pack/plugins/monitoring/public/views/elasticsearch/node/advanced/index.js b/x-pack/plugins/monitoring/public/views/elasticsearch/node/advanced/index.js index ff7f29c58b2f6..03c0714864f92 100644 --- a/x-pack/plugins/monitoring/public/views/elasticsearch/node/advanced/index.js +++ b/x-pack/plugins/monitoring/public/views/elasticsearch/node/advanced/index.js @@ -22,6 +22,7 @@ import { ALERT_CPU_USAGE, ALERT_MISSING_MONITORING_DATA, ALERT_DISK_USAGE, + ALERT_MEMORY_USAGE, } from '../../../../../common/constants'; function getPageData($injector) { @@ -72,7 +73,12 @@ uiRoutes.when('/elasticsearch/nodes/:node/advanced', { alerts: { shouldFetch: true, options: { - alertTypeIds: [ALERT_CPU_USAGE, ALERT_DISK_USAGE, ALERT_MISSING_MONITORING_DATA], + alertTypeIds: [ + ALERT_CPU_USAGE, + ALERT_DISK_USAGE, + ALERT_MEMORY_USAGE, + ALERT_MISSING_MONITORING_DATA, + ], filters: [ { nodeUuid: nodeName, diff --git a/x-pack/plugins/monitoring/public/views/elasticsearch/node/index.js b/x-pack/plugins/monitoring/public/views/elasticsearch/node/index.js index 15b9b7b4c0e4a..5164e93c266ca 100644 --- a/x-pack/plugins/monitoring/public/views/elasticsearch/node/index.js +++ b/x-pack/plugins/monitoring/public/views/elasticsearch/node/index.js @@ -23,6 +23,7 @@ import { ALERT_CPU_USAGE, ALERT_MISSING_MONITORING_DATA, ALERT_DISK_USAGE, + ALERT_MEMORY_USAGE, } from '../../../../common/constants'; uiRoutes.when('/elasticsearch/nodes/:node', { @@ -56,7 +57,12 @@ uiRoutes.when('/elasticsearch/nodes/:node', { alerts: { shouldFetch: true, options: { - alertTypeIds: [ALERT_CPU_USAGE, ALERT_DISK_USAGE, ALERT_MISSING_MONITORING_DATA], + alertTypeIds: [ + ALERT_CPU_USAGE, + ALERT_DISK_USAGE, + ALERT_MEMORY_USAGE, + ALERT_MISSING_MONITORING_DATA, + ], filters: [ { nodeUuid: nodeName, diff --git a/x-pack/plugins/monitoring/public/views/elasticsearch/nodes/index.js b/x-pack/plugins/monitoring/public/views/elasticsearch/nodes/index.js index ef807bf9b377d..4f66508c2d30f 100644 --- a/x-pack/plugins/monitoring/public/views/elasticsearch/nodes/index.js +++ b/x-pack/plugins/monitoring/public/views/elasticsearch/nodes/index.js @@ -21,6 +21,7 @@ import { ALERT_CPU_USAGE, ALERT_MISSING_MONITORING_DATA, ALERT_DISK_USAGE, + ALERT_MEMORY_USAGE, } from '../../../../common/constants'; uiRoutes.when('/elasticsearch/nodes', { @@ -88,7 +89,12 @@ uiRoutes.when('/elasticsearch/nodes', { alerts: { shouldFetch: true, options: { - alertTypeIds: [ALERT_CPU_USAGE, ALERT_DISK_USAGE, ALERT_MISSING_MONITORING_DATA], + alertTypeIds: [ + ALERT_CPU_USAGE, + ALERT_DISK_USAGE, + ALERT_MEMORY_USAGE, + ALERT_MISSING_MONITORING_DATA, + ], filters: [ { stackProduct: ELASTICSEARCH_SYSTEM_ID, diff --git a/x-pack/plugins/monitoring/server/alerts/alerts_factory.test.ts b/x-pack/plugins/monitoring/server/alerts/alerts_factory.test.ts index ddc8dcafebd21..f486061109b39 100644 --- a/x-pack/plugins/monitoring/server/alerts/alerts_factory.test.ts +++ b/x-pack/plugins/monitoring/server/alerts/alerts_factory.test.ts @@ -63,6 +63,6 @@ describe('AlertsFactory', () => { it('should get all', () => { const alerts = AlertsFactory.getAll(); - expect(alerts.length).toBe(9); + expect(alerts.length).toBe(10); }); }); diff --git a/x-pack/plugins/monitoring/server/alerts/alerts_factory.ts b/x-pack/plugins/monitoring/server/alerts/alerts_factory.ts index 05a92cea5469b..22c41c9c60038 100644 --- a/x-pack/plugins/monitoring/server/alerts/alerts_factory.ts +++ b/x-pack/plugins/monitoring/server/alerts/alerts_factory.ts @@ -8,6 +8,7 @@ import { CpuUsageAlert, MissingMonitoringDataAlert, DiskUsageAlert, + MemoryUsageAlert, NodesChangedAlert, ClusterHealthAlert, LicenseExpirationAlert, @@ -22,6 +23,7 @@ import { ALERT_CPU_USAGE, ALERT_MISSING_MONITORING_DATA, ALERT_DISK_USAGE, + ALERT_MEMORY_USAGE, ALERT_NODES_CHANGED, ALERT_LOGSTASH_VERSION_MISMATCH, ALERT_KIBANA_VERSION_MISMATCH, @@ -35,6 +37,7 @@ export const BY_TYPE = { [ALERT_CPU_USAGE]: CpuUsageAlert, [ALERT_MISSING_MONITORING_DATA]: MissingMonitoringDataAlert, [ALERT_DISK_USAGE]: DiskUsageAlert, + [ALERT_MEMORY_USAGE]: MemoryUsageAlert, [ALERT_NODES_CHANGED]: NodesChangedAlert, [ALERT_LOGSTASH_VERSION_MISMATCH]: LogstashVersionMismatchAlert, [ALERT_KIBANA_VERSION_MISMATCH]: KibanaVersionMismatchAlert, diff --git a/x-pack/plugins/monitoring/server/alerts/base_alert.ts b/x-pack/plugins/monitoring/server/alerts/base_alert.ts index 61486626040f7..c92291cf72093 100644 --- a/x-pack/plugins/monitoring/server/alerts/base_alert.ts +++ b/x-pack/plugins/monitoring/server/alerts/base_alert.ts @@ -377,4 +377,12 @@ export class BaseAlert { ) { throw new Error('Child classes must implement `executeActions`'); } + + protected createGlobalStateLink(link: string, clusterUuid: string, ccs?: string) { + const globalState = [`cluster_uuid:${clusterUuid}`]; + if (ccs) { + globalState.push(`ccs:${ccs}`); + } + return `${this.kibanaUrl}/app/monitoring#/${link}?_g=(${globalState.toString()})`; + } } diff --git a/x-pack/plugins/monitoring/server/alerts/cpu_usage_alert.test.ts b/x-pack/plugins/monitoring/server/alerts/cpu_usage_alert.test.ts index 495fe993cca1b..a53ae1f9d0dd5 100644 --- a/x-pack/plugins/monitoring/server/alerts/cpu_usage_alert.test.ts +++ b/x-pack/plugins/monitoring/server/alerts/cpu_usage_alert.test.ts @@ -78,7 +78,6 @@ describe('CpuUsageAlert', () => { }; const kibanaUrl = 'http://localhost:5601'; - const hasScheduledActions = jest.fn(); const replaceState = jest.fn(); const scheduleActions = jest.fn(); const getState = jest.fn(); @@ -87,7 +86,6 @@ describe('CpuUsageAlert', () => { callCluster: jest.fn(), alertInstanceFactory: jest.fn().mockImplementation(() => { return { - hasScheduledActions, replaceState, scheduleActions, getState, @@ -154,7 +152,7 @@ describe('CpuUsageAlert', () => { endToken: '#end_link', type: 'docLink', partialUrl: - '{elasticWebsiteUrl}/guide/en/elasticsearch/reference/{docLinkVersion}/cluster-nodes-hot-threads.html', + '{elasticWebsiteUrl}guide/en/elasticsearch/reference/{docLinkVersion}/cluster-nodes-hot-threads.html', }, ], }, @@ -166,7 +164,7 @@ describe('CpuUsageAlert', () => { endToken: '#end_link', type: 'docLink', partialUrl: - '{elasticWebsiteUrl}/guide/en/elasticsearch/reference/{docLinkVersion}/tasks.html', + '{elasticWebsiteUrl}guide/en/elasticsearch/reference/{docLinkVersion}/tasks.html', }, ], }, @@ -506,7 +504,7 @@ describe('CpuUsageAlert', () => { endToken: '#end_link', type: 'docLink', partialUrl: - '{elasticWebsiteUrl}/guide/en/elasticsearch/reference/{docLinkVersion}/cluster-nodes-hot-threads.html', + '{elasticWebsiteUrl}guide/en/elasticsearch/reference/{docLinkVersion}/cluster-nodes-hot-threads.html', }, ], }, @@ -518,7 +516,7 @@ describe('CpuUsageAlert', () => { endToken: '#end_link', type: 'docLink', partialUrl: - '{elasticWebsiteUrl}/guide/en/elasticsearch/reference/{docLinkVersion}/tasks.html', + '{elasticWebsiteUrl}guide/en/elasticsearch/reference/{docLinkVersion}/tasks.html', }, ], }, diff --git a/x-pack/plugins/monitoring/server/alerts/cpu_usage_alert.ts b/x-pack/plugins/monitoring/server/alerts/cpu_usage_alert.ts index ca9674c57216b..3117a160ecb62 100644 --- a/x-pack/plugins/monitoring/server/alerts/cpu_usage_alert.ts +++ b/x-pack/plugins/monitoring/server/alerts/cpu_usage_alert.ts @@ -193,13 +193,13 @@ export class CpuUsageAlert extends BaseAlert { i18n.translate('xpack.monitoring.alerts.cpuUsage.ui.nextSteps.hotThreads', { defaultMessage: '#start_linkCheck hot threads#end_link', }), - `{elasticWebsiteUrl}/guide/en/elasticsearch/reference/{docLinkVersion}/cluster-nodes-hot-threads.html` + `{elasticWebsiteUrl}guide/en/elasticsearch/reference/{docLinkVersion}/cluster-nodes-hot-threads.html` ), createLink( i18n.translate('xpack.monitoring.alerts.cpuUsage.ui.nextSteps.runningTasks', { defaultMessage: '#start_linkCheck long running tasks#end_link', }), - `{elasticWebsiteUrl}/guide/en/elasticsearch/reference/{docLinkVersion}/tasks.html` + `{elasticWebsiteUrl}guide/en/elasticsearch/reference/{docLinkVersion}/tasks.html` ), ], tokens: [ diff --git a/x-pack/plugins/monitoring/server/alerts/disk_usage_alert.test.ts b/x-pack/plugins/monitoring/server/alerts/disk_usage_alert.test.ts index 546399f666b6c..e3d69820ebb05 100644 --- a/x-pack/plugins/monitoring/server/alerts/disk_usage_alert.test.ts +++ b/x-pack/plugins/monitoring/server/alerts/disk_usage_alert.test.ts @@ -89,7 +89,6 @@ describe('DiskUsageAlert', () => { }; const kibanaUrl = 'http://localhost:5601'; - const hasScheduledActions = jest.fn(); const replaceState = jest.fn(); const scheduleActions = jest.fn(); const getState = jest.fn(); @@ -98,7 +97,6 @@ describe('DiskUsageAlert', () => { callCluster: jest.fn(), alertInstanceFactory: jest.fn().mockImplementation(() => { return { - hasScheduledActions, replaceState, scheduleActions, getState, diff --git a/x-pack/plugins/monitoring/server/alerts/disk_usage_alert.ts b/x-pack/plugins/monitoring/server/alerts/disk_usage_alert.ts index e43dca3ce87b1..c577550de8617 100644 --- a/x-pack/plugins/monitoring/server/alerts/disk_usage_alert.ts +++ b/x-pack/plugins/monitoring/server/alerts/disk_usage_alert.ts @@ -109,13 +109,13 @@ export class DiskUsageAlert extends BaseAlert { protected filterAlertInstance(alertInstance: RawAlertInstance, filters: CommonAlertFilter[]) { const alertInstanceStates = alertInstance.state?.alertStates as AlertDiskUsageState[]; - const nodeUuid = filters?.find((filter) => filter.nodeUuid); + const nodeFilter = filters?.find((filter) => filter.nodeUuid); - if (!filters || !filters.length || !alertInstanceStates?.length || !nodeUuid) { + if (!filters || !filters.length || !alertInstanceStates?.length || !nodeFilter?.nodeUuid) { return true; } - const nodeAlerts = alertInstanceStates.filter(({ nodeId }) => nodeId === nodeUuid); + const nodeAlerts = alertInstanceStates.filter(({ nodeId }) => nodeId === nodeFilter.nodeUuid); return Boolean(nodeAlerts.length); } @@ -160,7 +160,7 @@ export class DiskUsageAlert extends BaseAlert { i18n.translate('xpack.monitoring.alerts.diskUsage.ui.nextSteps.tuneDisk', { defaultMessage: '#start_linkTune for disk usage#end_link', }), - `{elasticWebsiteUrl}/guide/en/elasticsearch/reference/{docLinkVersion}/tune-for-disk-usage.html` + `{elasticWebsiteUrl}guide/en/elasticsearch/reference/{docLinkVersion}/tune-for-disk-usage.html` ), createLink( i18n.translate('xpack.monitoring.alerts.diskUsage.ui.nextSteps.identifyIndices', { @@ -173,19 +173,19 @@ export class DiskUsageAlert extends BaseAlert { i18n.translate('xpack.monitoring.alerts.diskUsage.ui.nextSteps.ilmPolicies', { defaultMessage: '#start_linkImplement ILM policies#end_link', }), - `{elasticWebsiteUrl}/guide/en/elasticsearch/reference/{docLinkVersion}/index-lifecycle-management.html` + `{elasticWebsiteUrl}guide/en/elasticsearch/reference/{docLinkVersion}/index-lifecycle-management.html` ), createLink( i18n.translate('xpack.monitoring.alerts.diskUsage.ui.nextSteps.addMoreNodes', { defaultMessage: '#start_linkAdd more data nodes#end_link', }), - `{elasticWebsiteUrl}/guide/en/elasticsearch/reference/{docLinkVersion}/add-elasticsearch-nodes.html` + `{elasticWebsiteUrl}guide/en/elasticsearch/reference/{docLinkVersion}/add-elasticsearch-nodes.html` ), createLink( i18n.translate('xpack.monitoring.alerts.diskUsage.ui.nextSteps.resizeYourDeployment', { defaultMessage: '#start_linkResize your deployment (ECE)#end_link', }), - `{elasticWebsiteUrl}/guide/en/cloud-enterprise/current/ece-resize-deployment.html` + `{elasticWebsiteUrl}guide/en/cloud-enterprise/current/ece-resize-deployment.html` ), ], tokens: [ @@ -331,7 +331,7 @@ export class DiskUsageAlert extends BaseAlert { const alertInstanceState = { alertStates: newAlertStates }; instance.replaceState(alertInstanceState); - if (newAlertStates.length && !instance.hasScheduledActions()) { + if (newAlertStates.length) { this.executeActions(instance, alertInstanceState, null, cluster); state.lastExecutedAction = currentUTC; } diff --git a/x-pack/plugins/monitoring/server/alerts/index.ts b/x-pack/plugins/monitoring/server/alerts/index.ts index 41f6daa38d1dc..48254f2dec326 100644 --- a/x-pack/plugins/monitoring/server/alerts/index.ts +++ b/x-pack/plugins/monitoring/server/alerts/index.ts @@ -8,6 +8,7 @@ export { BaseAlert } from './base_alert'; export { CpuUsageAlert } from './cpu_usage_alert'; export { MissingMonitoringDataAlert } from './missing_monitoring_data_alert'; export { DiskUsageAlert } from './disk_usage_alert'; +export { MemoryUsageAlert } from './memory_usage_alert'; export { ClusterHealthAlert } from './cluster_health_alert'; export { LicenseExpirationAlert } from './license_expiration_alert'; export { NodesChangedAlert } from './nodes_changed_alert'; diff --git a/x-pack/plugins/monitoring/server/alerts/memory_usage_alert.ts b/x-pack/plugins/monitoring/server/alerts/memory_usage_alert.ts new file mode 100644 index 0000000000000..8dc707afab1e1 --- /dev/null +++ b/x-pack/plugins/monitoring/server/alerts/memory_usage_alert.ts @@ -0,0 +1,355 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +import { IUiSettingsClient, Logger } from 'kibana/server'; +import { i18n } from '@kbn/i18n'; +import { BaseAlert } from './base_alert'; +import { + AlertData, + AlertCluster, + AlertState, + AlertMessage, + AlertMemoryUsageState, + AlertMessageTimeToken, + AlertMessageLinkToken, + AlertInstanceState, +} from './types'; +import { AlertInstance, AlertServices } from '../../../alerts/server'; +import { INDEX_PATTERN_ELASTICSEARCH, ALERT_MEMORY_USAGE } from '../../common/constants'; +import { fetchMemoryUsageNodeStats } from '../lib/alerts/fetch_memory_usage_node_stats'; +import { getCcsIndexPattern } from '../lib/alerts/get_ccs_index_pattern'; +import { AlertMessageTokenType, AlertSeverity, AlertParamType } from '../../common/enums'; +import { RawAlertInstance } from '../../../alerts/common'; +import { CommonAlertFilter, CommonAlertParams, CommonAlertParamDetail } from '../../common/types'; +import { AlertingDefaults, createLink } from './alerts_common'; +import { appendMetricbeatIndex } from '../lib/alerts/append_mb_index'; +import { parseDuration } from '../../../alerts/common/parse_duration'; + +interface ParamDetails { + [key: string]: CommonAlertParamDetail; +} + +export class MemoryUsageAlert extends BaseAlert { + public static readonly PARAM_DETAILS: ParamDetails = { + threshold: { + label: i18n.translate('xpack.monitoring.alerts.memoryUsage.paramDetails.threshold.label', { + defaultMessage: `Notify when memory usage is over`, + }), + type: AlertParamType.Percentage, + }, + duration: { + label: i18n.translate('xpack.monitoring.alerts.memoryUsage.paramDetails.duration.label', { + defaultMessage: `Look at the average over`, + }), + type: AlertParamType.Duration, + }, + }; + public static paramDetails = MemoryUsageAlert.PARAM_DETAILS; + public static readonly TYPE = ALERT_MEMORY_USAGE; + public static readonly LABEL = i18n.translate('xpack.monitoring.alerts.memoryUsage.label', { + defaultMessage: 'Memory Usage (JVM)', + }); + public type = MemoryUsageAlert.TYPE; + public label = MemoryUsageAlert.LABEL; + + protected defaultParams = { + threshold: 85, + duration: '5m', + }; + + protected actionVariables = [ + { + name: 'nodes', + description: i18n.translate('xpack.monitoring.alerts.memoryUsage.actionVariables.nodes', { + defaultMessage: 'The list of nodes reporting high memory usage.', + }), + }, + { + name: 'count', + description: i18n.translate('xpack.monitoring.alerts.memoryUsage.actionVariables.count', { + defaultMessage: 'The number of nodes reporting high memory usage.', + }), + }, + ...Object.values(AlertingDefaults.ALERT_TYPE.context), + ]; + + protected async fetchData( + params: CommonAlertParams, + callCluster: any, + clusters: AlertCluster[], + uiSettings: IUiSettingsClient, + availableCcs: string[] + ): Promise { + let esIndexPattern = appendMetricbeatIndex(this.config, INDEX_PATTERN_ELASTICSEARCH); + if (availableCcs) { + esIndexPattern = getCcsIndexPattern(esIndexPattern, availableCcs); + } + const { duration, threshold } = params; + const parsedDuration = parseDuration(duration as string); + const endMs = +new Date(); + const startMs = endMs - parsedDuration; + + const stats = await fetchMemoryUsageNodeStats( + callCluster, + clusters, + esIndexPattern, + startMs, + endMs, + this.config.ui.max_bucket_size + ); + + return stats.map((stat) => { + const { clusterUuid, nodeId, memoryUsage, ccs } = stat; + return { + instanceKey: `${clusterUuid}:${nodeId}`, + shouldFire: memoryUsage > threshold, + severity: AlertSeverity.Danger, + meta: stat, + clusterUuid, + ccs, + }; + }); + } + + protected filterAlertInstance(alertInstance: RawAlertInstance, filters: CommonAlertFilter[]) { + const alertInstanceStates = alertInstance.state?.alertStates as AlertMemoryUsageState[]; + const nodeFilter = filters?.find((filter) => filter.nodeUuid); + + if (!filters || !filters.length || !alertInstanceStates?.length || !nodeFilter?.nodeUuid) { + return true; + } + + const nodeAlerts = alertInstanceStates.filter(({ nodeId }) => nodeId === nodeFilter.nodeUuid); + return Boolean(nodeAlerts.length); + } + + protected getDefaultAlertState(cluster: AlertCluster, item: AlertData): AlertState { + const currentState = super.getDefaultAlertState(cluster, item); + currentState.ui.severity = AlertSeverity.Warning; + return currentState; + } + + protected getUiMessage(alertState: AlertState, item: AlertData): AlertMessage { + const stat = item.meta as AlertMemoryUsageState; + if (!alertState.ui.isFiring) { + return { + text: i18n.translate('xpack.monitoring.alerts.memoryUsage.ui.resolvedMessage', { + defaultMessage: `The JVM memory usage on node {nodeName} is now under the threshold, currently reporting at {memoryUsage}% as of #resolved`, + values: { + nodeName: stat.nodeName, + memoryUsage: stat.memoryUsage.toFixed(2), + }, + }), + tokens: [ + { + startToken: '#resolved', + type: AlertMessageTokenType.Time, + isAbsolute: true, + isRelative: false, + timestamp: alertState.ui.resolvedMS, + } as AlertMessageTimeToken, + ], + }; + } + return { + text: i18n.translate('xpack.monitoring.alerts.memoryUsage.ui.firingMessage', { + defaultMessage: `Node #start_link{nodeName}#end_link is reporting JVM memory usage of {memoryUsage}% at #absolute`, + values: { + nodeName: stat.nodeName, + memoryUsage: stat.memoryUsage, + }, + }), + nextSteps: [ + createLink( + i18n.translate('xpack.monitoring.alerts.memoryUsage.ui.nextSteps.tuneThreadPools', { + defaultMessage: '#start_linkTune thread pools#end_link', + }), + `{elasticWebsiteUrl}guide/en/elasticsearch/reference/{docLinkVersion}/modules-threadpool.html` + ), + createLink( + i18n.translate('xpack.monitoring.alerts.memoryUsage.ui.nextSteps.managingHeap', { + defaultMessage: '#start_linkManaging ES Heap#end_link', + }), + `{elasticWebsiteUrl}blog/a-heap-of-trouble` + ), + createLink( + i18n.translate('xpack.monitoring.alerts.memoryUsage.ui.nextSteps.identifyIndicesShards', { + defaultMessage: '#start_linkIdentify large indices/shards#end_link', + }), + 'elasticsearch/indices', + AlertMessageTokenType.Link + ), + createLink( + i18n.translate('xpack.monitoring.alerts.memoryUsage.ui.nextSteps.addMoreNodes', { + defaultMessage: '#start_linkAdd more data nodes#end_link', + }), + `{elasticWebsiteUrl}guide/en/elasticsearch/reference/{docLinkVersion}/add-elasticsearch-nodes.html` + ), + createLink( + i18n.translate('xpack.monitoring.alerts.memoryUsage.ui.nextSteps.resizeYourDeployment', { + defaultMessage: '#start_linkResize your deployment (ECE)#end_link', + }), + `{elasticWebsiteUrl}guide/en/cloud-enterprise/current/ece-resize-deployment.html` + ), + ], + tokens: [ + { + startToken: '#absolute', + type: AlertMessageTokenType.Time, + isAbsolute: true, + isRelative: false, + timestamp: alertState.ui.triggeredMS, + } as AlertMessageTimeToken, + { + startToken: '#start_link', + endToken: '#end_link', + type: AlertMessageTokenType.Link, + url: `elasticsearch/nodes/${stat.nodeId}`, + } as AlertMessageLinkToken, + ], + }; + } + + protected executeActions( + instance: AlertInstance, + { alertStates }: AlertInstanceState, + item: AlertData | null, + cluster: AlertCluster + ) { + const firingNodes = alertStates.filter( + (alertState) => alertState.ui.isFiring + ) as AlertMemoryUsageState[]; + const firingCount = firingNodes.length; + + if (firingCount > 0) { + const shortActionText = i18n.translate('xpack.monitoring.alerts.memoryUsage.shortAction', { + defaultMessage: 'Verify memory usage levels across affected nodes.', + }); + const fullActionText = i18n.translate('xpack.monitoring.alerts.memoryUsage.fullAction', { + defaultMessage: 'View nodes', + }); + + const ccs = alertStates.find((state) => state.ccs)?.ccs; + const globalStateLink = this.createGlobalStateLink( + 'elasticsearch/nodes', + cluster.clusterUuid, + ccs + ); + const action = `[${fullActionText}](${globalStateLink})`; + const internalShortMessage = i18n.translate( + 'xpack.monitoring.alerts.memoryUsage.firing.internalShortMessage', + { + defaultMessage: `Memory usage alert is firing for {count} node(s) in cluster: {clusterName}. {shortActionText}`, + values: { + count: firingCount, + clusterName: cluster.clusterName, + shortActionText, + }, + } + ); + const internalFullMessage = i18n.translate( + 'xpack.monitoring.alerts.memoryUsage.firing.internalFullMessage', + { + defaultMessage: `Memory usage alert is firing for {count} node(s) in cluster: {clusterName}. {action}`, + values: { + count: firingCount, + clusterName: cluster.clusterName, + action, + }, + } + ); + + instance.scheduleActions('default', { + internalShortMessage, + internalFullMessage: this.isCloud ? internalShortMessage : internalFullMessage, + state: AlertingDefaults.ALERT_STATE.firing, + nodes: firingNodes + .map((state) => `${state.nodeName}:${state.memoryUsage.toFixed(2)}`) + .join(','), + count: firingCount, + clusterName: cluster.clusterName, + action, + actionPlain: shortActionText, + }); + } else { + const resolvedNodes = (alertStates as AlertMemoryUsageState[]) + .filter((state) => !state.ui.isFiring) + .map((state) => `${state.nodeName}:${state.memoryUsage.toFixed(2)}`); + const resolvedCount = resolvedNodes.length; + + if (resolvedCount > 0) { + const internalMessage = i18n.translate( + 'xpack.monitoring.alerts.memoryUsage.resolved.internalMessage', + { + defaultMessage: `Memory usage alert is resolved for {count} node(s) in cluster: {clusterName}.`, + values: { + count: resolvedCount, + clusterName: cluster.clusterName, + }, + } + ); + + instance.scheduleActions('default', { + internalShortMessage: internalMessage, + internalFullMessage: internalMessage, + state: AlertingDefaults.ALERT_STATE.resolved, + nodes: resolvedNodes.join(','), + count: resolvedCount, + clusterName: cluster.clusterName, + }); + } + } + } + + protected async processData( + data: AlertData[], + clusters: AlertCluster[], + services: AlertServices, + logger: Logger, + state: any + ) { + const currentUTC = +new Date(); + for (const cluster of clusters) { + const nodes = data.filter((node) => node.clusterUuid === cluster.clusterUuid); + if (!nodes.length) { + continue; + } + + const firingNodeUuids = nodes + .filter((node) => node.shouldFire) + .map((node) => node.meta.nodeId) + .join(','); + const instanceId = `${this.type}:${cluster.clusterUuid}:${firingNodeUuids}`; + const instance = services.alertInstanceFactory(instanceId); + const newAlertStates: AlertMemoryUsageState[] = []; + + for (const node of nodes) { + const stat = node.meta as AlertMemoryUsageState; + const nodeState = this.getDefaultAlertState(cluster, node) as AlertMemoryUsageState; + nodeState.memoryUsage = stat.memoryUsage; + nodeState.nodeId = stat.nodeId; + nodeState.nodeName = stat.nodeName; + + if (node.shouldFire) { + nodeState.ui.triggeredMS = currentUTC; + nodeState.ui.isFiring = true; + nodeState.ui.severity = node.severity; + newAlertStates.push(nodeState); + } + nodeState.ui.message = this.getUiMessage(nodeState, node); + } + + const alertInstanceState = { alertStates: newAlertStates }; + instance.replaceState(alertInstanceState); + if (newAlertStates.length) { + this.executeActions(instance, alertInstanceState, null, cluster); + state.lastExecutedAction = currentUTC; + } + } + + state.lastChecked = currentUTC; + return state; + } +} diff --git a/x-pack/plugins/monitoring/server/alerts/missing_monitoring_data_alert.test.ts b/x-pack/plugins/monitoring/server/alerts/missing_monitoring_data_alert.test.ts index 4c06d9718c455..6ed237a055b5c 100644 --- a/x-pack/plugins/monitoring/server/alerts/missing_monitoring_data_alert.test.ts +++ b/x-pack/plugins/monitoring/server/alerts/missing_monitoring_data_alert.test.ts @@ -234,9 +234,9 @@ describe('MissingMonitoringDataAlert', () => { ], }); expect(scheduleActions).toHaveBeenCalledWith('default', { - internalFullMessage: `We have not detected any monitoring data for 2 stack product(s) in cluster: testCluster. [View what monitoring data we do have for these stack products.](http://localhost:5601/app/monitoring#overview?_g=(cluster_uuid:abc123))`, + internalFullMessage: `We have not detected any monitoring data for 2 stack product(s) in cluster: testCluster. [View what monitoring data we do have for these stack products.](http://localhost:5601/app/monitoring#/overview?_g=(cluster_uuid:abc123))`, internalShortMessage: `We have not detected any monitoring data for 2 stack product(s) in cluster: testCluster. Verify these stack products are up and running, then double check the monitoring settings.`, - action: `[View what monitoring data we do have for these stack products.](http://localhost:5601/app/monitoring#overview?_g=(cluster_uuid:abc123))`, + action: `[View what monitoring data we do have for these stack products.](http://localhost:5601/app/monitoring#/overview?_g=(cluster_uuid:abc123))`, actionPlain: 'Verify these stack products are up and running, then double check the monitoring settings.', clusterName, @@ -414,9 +414,9 @@ describe('MissingMonitoringDataAlert', () => { } as any); const count = 1; expect(scheduleActions).toHaveBeenCalledWith('default', { - internalFullMessage: `We have not detected any monitoring data for 1 stack product(s) in cluster: testCluster. [View what monitoring data we do have for these stack products.](http://localhost:5601/app/monitoring#overview?_g=(cluster_uuid:abc123,ccs:testCluster))`, + internalFullMessage: `We have not detected any monitoring data for 1 stack product(s) in cluster: testCluster. [View what monitoring data we do have for these stack products.](http://localhost:5601/app/monitoring#/overview?_g=(cluster_uuid:abc123,ccs:testCluster))`, internalShortMessage: `We have not detected any monitoring data for 1 stack product(s) in cluster: testCluster. Verify these stack products are up and running, then double check the monitoring settings.`, - action: `[View what monitoring data we do have for these stack products.](http://localhost:5601/app/monitoring#overview?_g=(cluster_uuid:abc123,ccs:testCluster))`, + action: `[View what monitoring data we do have for these stack products.](http://localhost:5601/app/monitoring#/overview?_g=(cluster_uuid:abc123,ccs:testCluster))`, actionPlain: 'Verify these stack products are up and running, then double check the monitoring settings.', clusterName, @@ -446,7 +446,7 @@ describe('MissingMonitoringDataAlert', () => { expect(scheduleActions).toHaveBeenCalledWith('default', { internalFullMessage: `We have not detected any monitoring data for 2 stack product(s) in cluster: testCluster. Verify these stack products are up and running, then double check the monitoring settings.`, internalShortMessage: `We have not detected any monitoring data for 2 stack product(s) in cluster: testCluster. Verify these stack products are up and running, then double check the monitoring settings.`, - action: `[View what monitoring data we do have for these stack products.](http://localhost:5601/app/monitoring#overview?_g=(cluster_uuid:abc123))`, + action: `[View what monitoring data we do have for these stack products.](http://localhost:5601/app/monitoring#/overview?_g=(cluster_uuid:abc123))`, actionPlain: 'Verify these stack products are up and running, then double check the monitoring settings.', clusterName, diff --git a/x-pack/plugins/monitoring/server/alerts/missing_monitoring_data_alert.ts b/x-pack/plugins/monitoring/server/alerts/missing_monitoring_data_alert.ts index 6017314f332e6..75dee475e7525 100644 --- a/x-pack/plugins/monitoring/server/alerts/missing_monitoring_data_alert.ts +++ b/x-pack/plugins/monitoring/server/alerts/missing_monitoring_data_alert.ts @@ -309,13 +309,6 @@ export class MissingMonitoringDataAlert extends BaseAlert { return; } - const ccs = instanceState.alertStates.reduce((accum: string, state): string => { - if (state.ccs) { - return state.ccs; - } - return accum; - }, ''); - const firingCount = instanceState.alertStates.filter((alertState) => alertState.ui.isFiring) .length; const firingStackProducts = instanceState.alertStates @@ -336,12 +329,10 @@ export class MissingMonitoringDataAlert extends BaseAlert { const fullActionText = i18n.translate('xpack.monitoring.alerts.missingData.fullAction', { defaultMessage: 'View what monitoring data we do have for these stack products.', }); - const globalState = [`cluster_uuid:${cluster.clusterUuid}`]; - if (ccs) { - globalState.push(`ccs:${ccs}`); - } - const url = `${this.kibanaUrl}/app/monitoring#overview?_g=(${globalState.join(',')})`; - const action = `[${fullActionText}](${url})`; + + const ccs = instanceState.alertStates.find((state) => state.ccs)?.ccs; + const globalStateLink = this.createGlobalStateLink('overview', cluster.clusterUuid, ccs); + const action = `[${fullActionText}](${globalStateLink})`; const internalShortMessage = i18n.translate( 'xpack.monitoring.alerts.missingData.firing.internalShortMessage', { diff --git a/x-pack/plugins/monitoring/server/alerts/types.d.ts b/x-pack/plugins/monitoring/server/alerts/types.d.ts index 4b78bca9f47ca..0b346e770a299 100644 --- a/x-pack/plugins/monitoring/server/alerts/types.d.ts +++ b/x-pack/plugins/monitoring/server/alerts/types.d.ts @@ -22,10 +22,17 @@ export interface AlertState { ui: AlertUiState; } -export interface AlertCpuUsageState extends AlertState { - cpuUsage: number; +export interface AlertNodeState extends AlertState { nodeId: string; - nodeName: string; + nodeName?: string; +} + +export interface AlertCpuUsageState extends AlertNodeState { + cpuUsage: number; +} + +export interface AlertDiskUsageState extends AlertNodeState { + diskUsage: number; } export interface AlertMissingDataState extends AlertState { @@ -35,10 +42,8 @@ export interface AlertMissingDataState extends AlertState { gapDuration: number; } -export interface AlertDiskUsageState extends AlertState { - diskUsage: number; - nodeId: string; - nodeName?: string; +export interface AlertMemoryUsageState extends AlertNodeState { + memoryUsage: number; } export interface AlertUiState { @@ -81,23 +86,26 @@ export interface AlertCluster { clusterName: string; } -export interface AlertCpuUsageNodeStats { +export interface AlertNodeStats { clusterUuid: string; nodeId: string; - nodeName: string; + nodeName?: string; + ccs?: string; +} + +export interface AlertCpuUsageNodeStats extends AlertNodeStats { cpuUsage: number; containerUsage: number; containerPeriods: number; containerQuota: number; - ccs?: string; } -export interface AlertDiskUsageNodeStats { - clusterUuid: string; - nodeId: string; - nodeName: string; +export interface AlertDiskUsageNodeStats extends AlertNodeStats { diskUsage: number; - ccs?: string; +} + +export interface AlertMemoryUsageNodeStats extends AlertNodeStats { + memoryUsage: number; } export interface AlertMissingData { diff --git a/x-pack/plugins/monitoring/server/lib/alerts/fetch_memory_usage_node_stats.ts b/x-pack/plugins/monitoring/server/lib/alerts/fetch_memory_usage_node_stats.ts new file mode 100644 index 0000000000000..c6843c3ed5f12 --- /dev/null +++ b/x-pack/plugins/monitoring/server/lib/alerts/fetch_memory_usage_node_stats.ts @@ -0,0 +1,118 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +import { get } from 'lodash'; +import { AlertCluster, AlertMemoryUsageNodeStats } from '../../alerts/types'; + +export async function fetchMemoryUsageNodeStats( + callCluster: any, + clusters: AlertCluster[], + index: string, + startMs: number, + endMs: number, + size: number +): Promise { + const clustersIds = clusters.map((cluster) => cluster.clusterUuid); + const params = { + index, + filterPath: ['aggregations'], + body: { + size: 0, + query: { + bool: { + filter: [ + { + terms: { + cluster_uuid: clustersIds, + }, + }, + { + term: { + type: 'node_stats', + }, + }, + { + range: { + timestamp: { + format: 'epoch_millis', + gte: startMs, + lte: endMs, + }, + }, + }, + ], + }, + }, + aggs: { + clusters: { + terms: { + field: 'cluster_uuid', + size, + }, + aggs: { + nodes: { + terms: { + field: 'source_node.uuid', + size, + }, + aggs: { + index: { + terms: { + field: '_index', + size: 1, + }, + }, + avg_heap: { + avg: { + field: 'node_stats.jvm.mem.heap_used_percent', + }, + }, + cluster_uuid: { + terms: { + field: 'cluster_uuid', + size: 1, + }, + }, + name: { + terms: { + field: 'source_node.name', + size: 1, + }, + }, + }, + }, + }, + }, + }, + }, + }; + + const response = await callCluster('search', params); + const stats: AlertMemoryUsageNodeStats[] = []; + const { buckets: clusterBuckets = [] } = response.aggregations.clusters; + + if (!clusterBuckets.length) { + return stats; + } + + for (const clusterBucket of clusterBuckets) { + for (const node of clusterBucket.nodes.buckets) { + const indexName = get(node, 'index.buckets[0].key', ''); + const memoryUsage = Math.floor(Number(get(node, 'avg_heap.value'))); + if (isNaN(memoryUsage) || memoryUsage === undefined || memoryUsage === null) { + continue; + } + stats.push({ + memoryUsage, + clusterUuid: clusterBucket.key, + nodeId: node.key, + nodeName: get(node, 'name.buckets[0].key'), + ccs: indexName.includes(':') ? indexName.split(':')[0] : null, + }); + } + } + return stats; +}