Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[data views] More efficient telemetry collection #152298

Merged
merged 14 commits into from
Mar 20, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import {
updateMax,
getIndexPatternTelemetry,
} from './register_index_pattern_usage_collection';
import { DataViewsService } from '../common';
import { SavedObjectsClient } from '@kbn/core/server';

const scriptA = 'emit(0);';
const scriptB = 'emit(1);\nemit(2);';
Expand All @@ -22,17 +22,36 @@ const scriptedFieldA = { script: scriptA };
const scriptedFieldB = { script: scriptB };
const scriptedFieldC = { script: scriptC };

/*
const runtimeFieldA = { runtimeField: { script: { source: scriptA } } };
const runtimeFieldB = { runtimeField: { script: { source: scriptB } } };
const runtimeFieldC = { runtimeField: { script: { source: scriptC } } };

const indexPatterns = {
getIds: async () => [1, 2, 3],
get: jest.fn().mockResolvedValue({
getScriptedFields: () => [],
fields: [],
*/

const runtimeFieldA = { script: { source: scriptA } };
const runtimeFieldB = { script: { source: scriptB } };
const runtimeFieldC = { script: { source: scriptC } };

let returnedSavedObjects = [
{
attributes: {
fields: '[]',
runtimeFieldMap: JSON.stringify({}),
},
},
];

const savedObjects = {
createPointInTimeFinder: jest.fn().mockReturnValue({
find: jest.fn().mockImplementation(async function* () {
yield await Promise.resolve({
total: 3,
saved_objects: returnedSavedObjects,
});
}),
close: jest.fn(),
}),
} as any as DataViewsService;
} as any as SavedObjectsClient;

describe('index pattern usage collection', () => {
it('minMaxAvgLoC calculates min, max, and average ', () => {
Expand All @@ -59,7 +78,7 @@ describe('index pattern usage collection', () => {
};

it('when there are no runtime fields or scripted fields', async () => {
expect(await getIndexPatternTelemetry(indexPatterns)).toEqual({
expect(await getIndexPatternTelemetry(savedObjects)).toEqual({
indexPatternsCount: 3,
indexPatternsWithScriptedFieldCount: 0,
indexPatternsWithRuntimeFieldCount: 0,
Expand All @@ -75,12 +94,20 @@ describe('index pattern usage collection', () => {
});

it('when there are both runtime fields or scripted fields', async () => {
indexPatterns.get = jest.fn().mockResolvedValue({
getScriptedFields: () => [scriptedFieldA, scriptedFieldB, scriptedFieldC],
fields: [runtimeFieldA, runtimeFieldB, runtimeFieldC],
});
const dataView = {
attributes: {
fields: JSON.stringify([scriptedFieldA, scriptedFieldB, scriptedFieldC]),
runtimeFieldMap: JSON.stringify({
runtimeFieldA,
runtimeFieldB,
runtimeFieldC,
}),
},
};

returnedSavedObjects = [dataView, dataView, dataView];

expect(await getIndexPatternTelemetry(indexPatterns)).toEqual({
expect(await getIndexPatternTelemetry(savedObjects)).toEqual({
indexPatternsCount: 3,
indexPatternsWithScriptedFieldCount: 3,
indexPatternsWithRuntimeFieldCount: 3,
Expand All @@ -96,12 +123,20 @@ describe('index pattern usage collection', () => {
});

it('when there are only runtime fields', async () => {
indexPatterns.get = jest.fn().mockResolvedValue({
getScriptedFields: () => [],
fields: [runtimeFieldA, runtimeFieldB, runtimeFieldC],
});
const dataView = {
attributes: {
fields: JSON.stringify([]),
runtimeFieldMap: JSON.stringify({
runtimeFieldA,
runtimeFieldB,
runtimeFieldC,
}),
},
};

returnedSavedObjects = [dataView, dataView, dataView];

expect(await getIndexPatternTelemetry(indexPatterns)).toEqual({
expect(await getIndexPatternTelemetry(savedObjects)).toEqual({
indexPatternsCount: 3,
indexPatternsWithScriptedFieldCount: 0,
indexPatternsWithRuntimeFieldCount: 3,
Expand All @@ -117,12 +152,16 @@ describe('index pattern usage collection', () => {
});

it('when there are only scripted fields', async () => {
indexPatterns.get = jest.fn().mockResolvedValue({
getScriptedFields: () => [scriptedFieldA, scriptedFieldB, scriptedFieldC],
fields: [],
});
const dataView = {
attributes: {
fields: JSON.stringify([scriptedFieldA, scriptedFieldB, scriptedFieldC]),
runtimeFieldMap: JSON.stringify({}),
},
};

returnedSavedObjects = [dataView, dataView, dataView];

expect(await getIndexPatternTelemetry(indexPatterns)).toEqual({
expect(await getIndexPatternTelemetry(savedObjects)).toEqual({
indexPatternsCount: 3,
indexPatternsWithScriptedFieldCount: 3,
indexPatternsWithRuntimeFieldCount: 0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,13 @@

import { UsageCollectionSetup } from '@kbn/usage-collection-plugin/server';
import { StartServicesAccessor } from '@kbn/core/server';
import { SavedObjectsClient } from '@kbn/core/server';
import { DataViewsContract } from '../common';
import { SavedObjectsClient, SavedObjectsCreatePointInTimeFinderOptions } from '@kbn/core/server';
import {
DATA_VIEW_SAVED_OBJECT_TYPE,
DataViewAttributes,
FieldSpec,
RuntimeField,
} from '../common';
import { DataViewsServerPluginStartDependencies, DataViewsServerPluginStart } from './types';

interface CountSummary {
Expand All @@ -18,6 +23,8 @@ interface CountSummary {
avg?: number;
}

type DataViewFieldAttrs = Pick<DataViewAttributes, 'fields' | 'runtimeFieldMap'>;

interface IndexPatternUsage {
indexPatternsCount: number;
indexPatternsWithScriptedFieldCount: number;
Expand Down Expand Up @@ -57,17 +64,15 @@ export const updateMax = (currentMax: number | undefined, newVal: number): numbe
}
};

export async function getIndexPatternTelemetry(indexPatterns: DataViewsContract) {
const ids = await indexPatterns.getIds();

export async function getIndexPatternTelemetry(savedObjectsService: SavedObjectsClient) {
const countSummaryDefaults: CountSummary = {
min: undefined,
max: undefined,
avg: undefined,
};

const results = {
indexPatternsCount: ids.length,
indexPatternsCount: 0,
indexPatternsWithScriptedFieldCount: 0,
indexPatternsWithRuntimeFieldCount: 0,
scriptedFieldCount: 0,
Expand All @@ -80,60 +85,74 @@ export async function getIndexPatternTelemetry(indexPatterns: DataViewsContract)
},
};

await ids.reduce(async (col, id) => {
await col;
const ip = await indexPatterns.get(id);
const findOptions: SavedObjectsCreatePointInTimeFinderOptions = {
type: DATA_VIEW_SAVED_OBJECT_TYPE,
perPage: 1000,
fields: ['attributes.fields', 'attributes.runtimeFieldMap'],
};

const scriptedFields = ip.getScriptedFields();
const runtimeFields = ip.fields.filter((fld) => !!fld.runtimeField);
const finder = savedObjectsService.createPointInTimeFinder<DataViewFieldAttrs>(findOptions);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we also call finder.close() or it's not required?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Its not needed with the for loop.


if (scriptedFields.length > 0) {
// increment counts
results.indexPatternsWithScriptedFieldCount++;
results.scriptedFieldCount += scriptedFields.length;
for await (const response of finder.find()) {
const { saved_objects: savedObjects, total } = response;
results.indexPatternsCount = total;

// calc LoC
results.perIndexPattern.scriptedFieldLineCount = minMaxAvgLoC(
scriptedFields.map((fld) => fld.script || '')
);

// calc field counts
results.perIndexPattern.scriptedFieldCount.min = updateMin(
results.perIndexPattern.scriptedFieldCount.min,
scriptedFields.length
);
results.perIndexPattern.scriptedFieldCount.max = updateMax(
results.perIndexPattern.scriptedFieldCount.max,
scriptedFields.length
);
results.perIndexPattern.scriptedFieldCount.avg =
results.scriptedFieldCount / results.indexPatternsWithScriptedFieldCount;
}

if (runtimeFields.length > 0) {
// increment counts
results.indexPatternsWithRuntimeFieldCount++;
results.runtimeFieldCount += runtimeFields.length;
savedObjects.forEach((obj) => {
const fields = JSON.parse(obj.attributes.fields) || [];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the JSON.parse threw an exception locally, so it's recommendable to use atry{}catch(){} pattern for this case

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I had failed to push a change. I also changed the default data view definition in the test not to have the attributes that are used as to provide test coverage.

const runtimeFieldsMap = obj.attributes.runtimeFieldMap
? JSON.parse(obj.attributes.runtimeFieldMap) || {}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is also a candidate for try - catch

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mattkime Telemetry stats now work as excepted 👍 ... one think I still think worth to consider is using a try - catch pattern for the JSON.parse. Because if for whatever reason an invalid JSON string is passed in, a SyntaxError is thrown, and this would lead and the one hostile data view would stop the generation of the results ... or do I miss somewhere this was caught? ... or would such a case also lead to troubles before this change?

: {};
const scriptedFields: FieldSpec[] = fields.filter((fld: FieldSpec) => !!fld.script);
// to array
const runtimeFields: RuntimeField[] = Object.values(runtimeFieldsMap);

// calc LoC
const runtimeFieldScripts = runtimeFields.map(
(fld) => fld.runtimeField?.script?.source || ''
);
results.perIndexPattern.runtimeFieldLineCount = minMaxAvgLoC(runtimeFieldScripts);

// calc field counts
results.perIndexPattern.runtimeFieldCount.min = updateMin(
results.perIndexPattern.runtimeFieldCount.min,
runtimeFields.length
);
results.perIndexPattern.runtimeFieldCount.max = updateMax(
results.perIndexPattern.runtimeFieldCount.max,
runtimeFields.length
);
results.perIndexPattern.runtimeFieldCount.avg =
results.runtimeFieldCount / results.indexPatternsWithRuntimeFieldCount;
}
}, Promise.resolve());
if (scriptedFields.length > 0) {
// increment counts
results.indexPatternsWithScriptedFieldCount++;
results.scriptedFieldCount += scriptedFields.length;

// calc LoC
results.perIndexPattern.scriptedFieldLineCount = minMaxAvgLoC(
scriptedFields.map((fld) => fld.script || '')
);

// calc field counts
results.perIndexPattern.scriptedFieldCount.min = updateMin(
results.perIndexPattern.scriptedFieldCount.min,
scriptedFields.length
);
results.perIndexPattern.scriptedFieldCount.max = updateMax(
results.perIndexPattern.scriptedFieldCount.max,
scriptedFields.length
);
results.perIndexPattern.scriptedFieldCount.avg =
results.scriptedFieldCount / results.indexPatternsWithScriptedFieldCount;
}

if (runtimeFields.length > 0) {
// increment counts
results.indexPatternsWithRuntimeFieldCount++;
results.runtimeFieldCount += runtimeFields.length;

// calc LoC
const runtimeFieldScripts = runtimeFields.map((fld) => fld.script?.source || '');
results.perIndexPattern.runtimeFieldLineCount = minMaxAvgLoC(runtimeFieldScripts);

// calc field counts
results.perIndexPattern.runtimeFieldCount.min = updateMin(
results.perIndexPattern.runtimeFieldCount.min,
runtimeFields.length
);
results.perIndexPattern.runtimeFieldCount.max = updateMax(
results.perIndexPattern.runtimeFieldCount.max,
runtimeFields.length
);
results.perIndexPattern.runtimeFieldCount.avg =
results.runtimeFieldCount / results.indexPatternsWithRuntimeFieldCount;
}
});
}

return results;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

console.log here to see results. results are encrypted in the http response. You may need to restart kibana to run this multiple times.

}
Expand All @@ -153,14 +172,10 @@ export function registerIndexPatternsUsageCollector(
type: 'index-patterns',
isReady: () => true,
fetch: async () => {
const [{ savedObjects, elasticsearch }, , { dataViewsServiceFactory }] =
await getStartServices();
const indexPatternService = await dataViewsServiceFactory(
new SavedObjectsClient(savedObjects.createInternalRepository()),
elasticsearch.client.asInternalUser
);

return await getIndexPatternTelemetry(indexPatternService);
const [{ savedObjects }] = await getStartServices();

const savedObjectsService = new SavedObjectsClient(savedObjects.createInternalRepository());
return await getIndexPatternTelemetry(savedObjectsService);
},
schema: {
indexPatternsCount: { type: 'long' },
Expand Down