Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ML] DF Analytics - auto-populate model_memory_limit #50714

Merged
4 changes: 4 additions & 0 deletions x-pack/legacy/plugins/ml/common/util/job_utils.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ export const ML_DATA_PREVIEW_COUNT: number;

export function isJobIdValid(jobId: string): boolean;

export function validateModelMemoryLimitUnits(
modelMemoryLimit: string
): { valid: boolean; messages: any[]; contains: () => boolean; find: () => void };

export function processCreatedBy(customSettings: { created_by?: string }): void;

export function mlFunctionToESAggregation(functionName: string): string | null;
9 changes: 5 additions & 4 deletions x-pack/legacy/plugins/ml/common/util/job_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -405,10 +405,11 @@ export function basicJobValidation(job, fields, limits, skipMmlChecks = false) {

if (skipMmlChecks === false) {
// model memory limit
const mml = job.analysis_limits && job.analysis_limits.model_memory_limit;
const {
messages: mmlUnitMessages,
valid: mmlUnitValid,
} = validateModelMemoryLimitUnits(job);
} = validateModelMemoryLimitUnits(mml);

messages.push(...mmlUnitMessages);
valid = (valid && mmlUnitValid);
Expand Down Expand Up @@ -494,12 +495,12 @@ export function validateModelMemoryLimit(job, limits) {
};
}

export function validateModelMemoryLimitUnits(job) {
export function validateModelMemoryLimitUnits(modelMemoryLimit) {
const messages = [];
let valid = true;

if (typeof job.analysis_limits !== 'undefined' && typeof job.analysis_limits.model_memory_limit !== 'undefined') {
const mml = job.analysis_limits.model_memory_limit.toUpperCase();
if (modelMemoryLimit !== undefined) {
const mml = modelMemoryLimit.toUpperCase();
const mmlSplit = mml.match(/\d+(\w+)$/);
const unit = (mmlSplit && mmlSplit.length === 2) ? mmlSplit[1] : null;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ describe('Data Frame Analytics: <CreateAnalyticsForm />', () => {
);

const euiFormRows = wrapper.find('EuiFormRow');
expect(euiFormRows.length).toBe(6);
expect(euiFormRows.length).toBe(7);

const row1 = euiFormRows.at(0);
expect(row1.find('label').text()).toBe('Job type');
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,21 @@ import { FormattedMessage } from '@kbn/i18n/react';

import { metadata } from 'ui/metadata';
import { IndexPattern, INDEX_PATTERN_ILLEGAL_CHARACTERS } from 'ui/index_patterns';
import { ml } from '../../../../../services/ml_api_service';
import { Field, EVENT_RATE_FIELD_ID } from '../../../../../../common/types/fields';

import { newJobCapsService } from '../../../../../services/new_job_capabilities_service';
import { useKibanaContext } from '../../../../../contexts/kibana';
import { CreateAnalyticsFormProps } from '../../hooks/use_create_analytics_form';
import { JOB_TYPES } from '../../hooks/use_create_analytics_form/state';
import {
JOB_TYPES,
DEFAULT_MODEL_MEMORY_LIMIT,
getJobConfigFromFormState,
} from '../../hooks/use_create_analytics_form/state';
import { JOB_ID_MAX_LENGTH } from '../../../../../../common/constants/validation';
import { Messages } from './messages';
import { JobType } from './job_type';
import { mmlUnitInvalidErrorMessage } from '../../hooks/use_create_analytics_form/reducer';

// based on code used by `ui/index_patterns` internally
// remove the space character from the list of illegal characters
Expand Down Expand Up @@ -73,6 +79,8 @@ export const CreateAnalyticsForm: FC<CreateAnalyticsFormProps> = ({ actions, sta
jobIdInvalidMaxLength,
jobType,
loadingDepFieldOptions,
modelMemoryLimit,
modelMemoryLimitUnitValid,
sourceIndex,
sourceIndexNameEmpty,
sourceIndexNameValid,
Expand Down Expand Up @@ -103,6 +111,25 @@ export const CreateAnalyticsForm: FC<CreateAnalyticsFormProps> = ({ actions, sta
}
};

const loadModelMemoryLimitEstimate = async () => {
try {
const jobConfig = getJobConfigFromFormState(form);
delete jobConfig.dest;
Comment on lines +116 to +117
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it can be const { dest, ...jobConfig} = getJobConfigFromFormState(form);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for taking a look! 😄
I think that's a good option if mutability is a concern, but in this case jobConfig isn't relied on for anything else. I think in this case I prefer clarity of semantics and functionality by explicitly using delete.
Unless I'm missing some other benefit that this has over using delete 🤔
cc @darnautov

delete jobConfig.model_memory_limit;
const resp = await ml.dataFrameAnalytics.estimateDataFrameAnalyticsMemoryUsage(jobConfig);
setFormState({
modelMemoryLimit: resp.expected_memory_without_disk,
});
} catch (e) {
setFormState({
modelMemoryLimit:
jobType !== undefined
? DEFAULT_MODEL_MEMORY_LIMIT[jobType]
: DEFAULT_MODEL_MEMORY_LIMIT.outlier_detection,
});
}
};

const loadDependentFieldOptions = async () => {
setFormState({
loadingDepFieldOptions: true,
Expand Down Expand Up @@ -175,6 +202,21 @@ export const CreateAnalyticsForm: FC<CreateAnalyticsFormProps> = ({ actions, sta
}
}, [sourceIndex, jobType, sourceIndexNameEmpty]);

useEffect(() => {
const hasBasicRequiredFields =
jobType !== undefined && sourceIndex !== '' && sourceIndexNameValid === true;

const hasRequiredAnalysisFields =
(jobType === JOB_TYPES.REGRESSION &&
dependentVariable !== '' &&
trainingPercent !== undefined) ||
jobType === JOB_TYPES.OUTLIER_DETECTION;

if (hasBasicRequiredFields && hasRequiredAnalysisFields) {
loadModelMemoryLimitEstimate();
}
}, [jobType, sourceIndex, dependentVariable, trainingPercent]);

return (
<EuiForm className="mlDataFrameAnalyticsCreateForm">
<Messages messages={requestMessages} />
Expand Down Expand Up @@ -277,7 +319,7 @@ export const CreateAnalyticsForm: FC<CreateAnalyticsFormProps> = ({ actions, sta
placeholder={i18n.translate(
'xpack.ml.dataframe.analytics.create.sourceIndexPlaceholder',
{
defaultMessage: 'Choose a source index pattern or saved search.',
defaultMessage: 'Choose a source index pattern.',
}
)}
singleSelection={{ asPlainText: true }}
Expand Down Expand Up @@ -437,6 +479,24 @@ export const CreateAnalyticsForm: FC<CreateAnalyticsFormProps> = ({ actions, sta
</EuiFormRow>
</Fragment>
)}
<EuiFormRow
label={i18n.translate('xpack.ml.dataframe.analytics.create.modelMemoryLimitLabel', {
defaultMessage: 'Model memory limit',
})}
helpText={!modelMemoryLimitUnitValid && mmlUnitInvalidErrorMessage}
>
<EuiFieldText
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could the default value (or the one from the endpoint) be used as a placeholder?

image

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Placeholder added: f8c50cfe758da1782208983286bf40e4fc06af1f

placeholder={
jobType !== undefined
? DEFAULT_MODEL_MEMORY_LIMIT[jobType]
: DEFAULT_MODEL_MEMORY_LIMIT.outlier_detection
}
disabled={isJobCreated}
value={modelMemoryLimit || ''}
onChange={e => setFormState({ modelMemoryLimit: e.target.value })}
isInvalid={modelMemoryLimit === ''}
peteharverson marked this conversation as resolved.
Show resolved Hide resolved
/>
</EuiFormRow>
<EuiFormRow
isInvalid={createIndexPattern && destinationIndexPatternTitleExists}
error={
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,13 @@ jest.mock('ui/index_patterns', () => ({

type SourceIndex = DataFrameAnalyticsConfig['source']['index'];

const getMockState = (index: SourceIndex) =>
const getMockState = ({
index,
modelMemoryLimit,
}: {
index: SourceIndex;
modelMemoryLimit?: string;
}) =>
merge(getInitialState(), {
form: {
jobIdEmpty: false,
Expand All @@ -30,6 +36,7 @@ const getMockState = (index: SourceIndex) =>
source: { index },
dest: { index: 'the-destination-index' },
analysis: {},
model_memory_limit: modelMemoryLimit,
},
});

Expand Down Expand Up @@ -89,27 +96,50 @@ describe('useCreateAnalyticsForm', () => {

test('validateAdvancedEditor(): check index pattern variations', () => {
// valid single index pattern
expect(validateAdvancedEditor(getMockState('the-source-index')).isValid).toBe(true);
expect(validateAdvancedEditor(getMockState({ index: 'the-source-index' })).isValid).toBe(true);
// valid array with one ES index pattern
expect(validateAdvancedEditor(getMockState(['the-source-index'])).isValid).toBe(true);
expect(validateAdvancedEditor(getMockState({ index: ['the-source-index'] })).isValid).toBe(
true
);
// valid array with two ES index patterns
expect(
validateAdvancedEditor(getMockState(['the-source-index-1', 'the-source-index-2'])).isValid
validateAdvancedEditor(getMockState({ index: ['the-source-index-1', 'the-source-index-2'] }))
.isValid
).toBe(true);
// invalid comma-separated index pattern, this is only allowed in the simple form
// but not the advanced editor.
expect(
validateAdvancedEditor(getMockState('the-source-index-1,the-source-index-2')).isValid
validateAdvancedEditor(getMockState({ index: 'the-source-index-1,the-source-index-2' }))
.isValid
).toBe(false);
expect(
validateAdvancedEditor(
getMockState(['the-source-index-1,the-source-index-2', 'the-source-index-3'])
getMockState({ index: ['the-source-index-1,the-source-index-2', 'the-source-index-3'] })
).isValid
).toBe(false);
// invalid formats ("fake" TS casting to get valid TS and be able to run the tests)
expect(validateAdvancedEditor(getMockState({} as SourceIndex)).isValid).toBe(false);
expect(validateAdvancedEditor(getMockState({ index: {} as SourceIndex })).isValid).toBe(false);
expect(
validateAdvancedEditor(getMockState((undefined as unknown) as SourceIndex)).isValid
validateAdvancedEditor(getMockState({ index: (undefined as unknown) as SourceIndex })).isValid
).toBe(false);
});

test('validateAdvancedEditor(): check model memory limit validation', () => {
// valid model_memory_limit units
expect(
validateAdvancedEditor(getMockState({ index: 'the-source-index', modelMemoryLimit: '100mb' }))
.isValid
).toBe(true);
// invalid model_memory_limit units
expect(
validateAdvancedEditor(
getMockState({ index: 'the-source-index', modelMemoryLimit: '100bob' })
).isValid
).toBe(false);
// invalid model_memory_limit if empty
expect(
validateAdvancedEditor(getMockState({ index: 'the-source-index', modelMemoryLimit: '' }))
.isValid
).toBe(false);
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,29 @@ import { isValidIndexName } from '../../../../../../common/util/es_utils';

import { Action, ACTION } from './actions';
import { getInitialState, getJobConfigFromFormState, State, JOB_TYPES } from './state';
import { isJobIdValid } from '../../../../../../common/util/job_utils';
import {
isJobIdValid,
validateModelMemoryLimitUnits,
} from '../../../../../../common/util/job_utils';
import { maxLengthValidator } from '../../../../../../common/util/validators';
import { JOB_ID_MAX_LENGTH } from '../../../../../../common/constants/validation';
import {
JOB_ID_MAX_LENGTH,
ALLOWED_DATA_UNITS,
} from '../../../../../../common/constants/validation';
import { getDependentVar, isRegressionAnalysis } from '../../../../common/analytics';

const mmlAllowedUnitsStr = `${ALLOWED_DATA_UNITS.slice(0, ALLOWED_DATA_UNITS.length - 1).join(
', '
)} or ${[...ALLOWED_DATA_UNITS].pop()}`;

export const mmlUnitInvalidErrorMessage = i18n.translate(
'xpack.ml.dataframe.analytics.create.modelMemoryUnitsInvalidError',
{
defaultMessage: 'Model memory limit data unit unrecognized. It must be {str}',
values: { str: mmlAllowedUnitsStr },
}
);

const getSourceIndexString = (state: State) => {
const { jobConfig } = state;

Expand Down Expand Up @@ -63,6 +81,12 @@ export const validateAdvancedEditor = (state: State): State => {
const destinationIndexNameValid = isValidIndexName(destinationIndexName);
const destinationIndexPatternTitleExists =
state.indexPatternsMap[destinationIndexName] !== undefined;
const mml = jobConfig.model_memory_limit;
const modelMemoryLimitEmpty = mml === '';
if (!modelMemoryLimitEmpty && mml !== undefined) {
const { valid } = validateModelMemoryLimitUnits(mml);
state.form.modelMemoryLimitUnitValid = valid;
}

let dependentVariableEmpty = false;
if (isRegressionAnalysis(jobConfig.analysis)) {
Expand Down Expand Up @@ -126,7 +150,27 @@ export const validateAdvancedEditor = (state: State): State => {
});
}

if (modelMemoryLimitEmpty) {
state.advancedEditorMessages.push({
error: i18n.translate(
'xpack.ml.dataframe.analytics.create.advancedEditorMessage.modelMemoryLimitEmpty',
{
defaultMessage: 'The model memory limit field must not be empty.',
}
),
message: '',
});
}

if (!state.form.modelMemoryLimitUnitValid) {
state.advancedEditorMessages.push({
error: mmlUnitInvalidErrorMessage,
message: '',
});
}

state.isValid =
state.form.modelMemoryLimitUnitValid &&
!jobIdEmpty &&
jobIdValid &&
!jobIdExists &&
Expand All @@ -135,6 +179,7 @@ export const validateAdvancedEditor = (state: State): State => {
!destinationIndexNameEmpty &&
destinationIndexNameValid &&
!dependentVariableEmpty &&
!modelMemoryLimitEmpty &&
(!destinationIndexPatternTitleExists || !createIndexPattern);

return state;
Expand All @@ -153,11 +198,19 @@ const validateForm = (state: State): State => {
destinationIndexPatternTitleExists,
createIndexPattern,
dependentVariable,
modelMemoryLimit,
} = state.form;

const dependentVariableEmpty = jobType === JOB_TYPES.REGRESSION && dependentVariable === '';
const modelMemoryLimitEmpty = modelMemoryLimit === '';

if (!modelMemoryLimitEmpty && modelMemoryLimit !== undefined) {
const { valid } = validateModelMemoryLimitUnits(modelMemoryLimit);
state.form.modelMemoryLimitUnitValid = valid;
}

state.isValid =
state.form.modelMemoryLimitUnitValid &&
!jobIdEmpty &&
jobIdValid &&
!jobIdExists &&
Expand All @@ -166,6 +219,7 @@ const validateForm = (state: State): State => {
!destinationIndexNameEmpty &&
destinationIndexNameValid &&
!dependentVariableEmpty &&
!modelMemoryLimitEmpty &&
(!destinationIndexPatternTitleExists || !createIndexPattern);

return state;
Expand Down
Loading