Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Warm up Orchestrator build cache from previous Composer session #6550

Merged
merged 20 commits into from
Mar 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
dce21dc
Update orch package
taicchoumsft Mar 22, 2021
ae82b46
Merge branch 'main' into tachou/orchColdBootOptimization
taicchoumsft Mar 22, 2021
56de6b9
initial implementation
taicchoumsft Mar 23, 2021
015aec4
Merge branch 'main' into tachou/orchColdBootOptimization
taicchoumsft Mar 24, 2021
1eaa774
Tighten up code and exception handling
taicchoumsft Mar 24, 2021
58b96b3
Unit tests
taicchoumsft Mar 25, 2021
bffbbc9
Fix linter errors
taicchoumsft Mar 25, 2021
eb69fab
Merge branch 'main' into tachou/orchColdBootOptimization
taicchoumsft Mar 25, 2021
0173319
Don't rethrow error for cache - safe to continue on
taicchoumsft Mar 25, 2021
85959bc
Merge branch 'main' into tachou/orchColdBootOptimization
taicchoumsft Mar 25, 2021
203694f
Merge branch 'main' into tachou/orchColdBootOptimization
taicchoumsft Mar 25, 2021
f7388a8
Merge branch 'main' into tachou/orchColdBootOptimization
taicchoumsft Mar 25, 2021
a72f752
Fix build error of worker script when testing
taicchoumsft Mar 25, 2021
5bc8fdf
Merge branch 'tachou/orchColdBootOptimization' of https://github.com/…
taicchoumsft Mar 25, 2021
63de49d
Merge branch 'main' into tachou/orchColdBootOptimization
taicchoumsft Mar 25, 2021
0c00e30
Merge branch 'main' into tachou/orchColdBootOptimization
boydc2014 Mar 26, 2021
a18fb55
Merge branch 'main' into tachou/orchColdBootOptimization
boydc2014 Mar 26, 2021
5d454f6
Merge branch 'main' into tachou/orchColdBootOptimization
taicchoumsft Mar 29, 2021
23395b1
Merge branch 'main' into tachou/orchColdBootOptimization
boydc2014 Mar 30, 2021
78c2946
Merge branch 'main' into tachou/orchColdBootOptimization
boydc2014 Mar 30, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Composer/packages/server/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,14 @@
"@bfc/intellisense-languageserver": "*",
"@bfc/lg-languageserver": "*",
"@bfc/lu-languageserver": "*",
"@bfc/shared": "*",
"@bfc/server-workers": "*",
"@bfc/shared": "*",
"@botframework-composer/types": "*",
"@microsoft/bf-dialog": "4.11.0-dev.20201025.69cf2b9",
"@microsoft/bf-dispatcher": "^4.11.0-beta.20201016.393c6b2",
"@microsoft/bf-generate-library": "^4.10.0-daily.20210225.217555",
"@microsoft/bf-lu": "4.12.0-rc0",
"@microsoft/bf-orchestrator": "4.12.0-beta.20210322.314475a",
"@microsoft/bf-orchestrator": "4.13.0-beta.20210316.e8ec340",
"applicationinsights": "^1.8.7",
"archiver": "^5.0.2",
"axios": "^0.21.1",
Expand Down
4 changes: 2 additions & 2 deletions Composer/packages/server/src/controllers/orchestrator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ async function downloadDefaultModel(req: Request, res: Response) {
const lang = req.body;

if (!isDefaultModelRequest(lang)) {
res.send(400);
res.sendStatus(400);
return;
}

Expand All @@ -62,7 +62,7 @@ async function downloadDefaultModel(req: Request, res: Response) {

if (await pathExists(modelPath)) {
state = DownloadState.ALREADYDOWNLOADED;
return res.send(201);
return res.sendStatus(201);
}

const onProgress = (msg: string) => {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

import { LabelResolver, Utility, Orchestrator } from '@microsoft/bf-orchestrator';
import { pathExists, readdir, readJson } from 'fs-extra';

import { cache, warmUpCache } from '../process/orchestratorWorker';

jest.mock('@microsoft/bf-orchestrator');
jest.mock('fs-extra', () => ({
pathExists: jest.fn(async (path) => path === './generatedFolder' || path.endsWith('orchestrator.settings.json')),
readdir: jest.fn(async (path) => {
if (path === './generatedFolder') {
return ['test.en.lu', 'test.en.blu', 'test.zh-cn.blu', 'settings.json', '/path'];
}
return [];
}),
readJson: jest.fn(async (file) => {
return {
orchestrator: {
models: {
en: './model/en.onnx',
multilang: './model/multilang.onnx',
},
snapshots: {
testZhCn: './generated/test.zh-cn.blu',
},
},
};
}),
readFile: jest.fn(async (file) => {
return Buffer.from('test blu file');
}),
}));

describe('Orchestrator Warmup Cache', () => {
beforeAll(async () => {
Utility.toPrintDebuggingLogToConsole = false; //disable Orchestrator logging
});

beforeEach(async () => {
(Orchestrator.getLabelResolversAsync as jest.Mock).mockImplementation(
async (intentModelPath: string, _: string, snapshots: Map<string, Uint8Array>) => {
return new Map<string, LabelResolver>();
}
);

(readdir as jest.Mock).mockClear();
(pathExists as jest.Mock).mockClear();
(Orchestrator.getLabelResolversAsync as jest.Mock).mockClear();

cache.clear();
});

it('exits on invalid generatedFolderPath', async () => {
expect(await warmUpCache('badpath', 'abc')).toBeFalsy();
});

it('exits if cache for project has contents', async () => {
const data: [string, LabelResolver] = ['test.en.lu', {} as LabelResolver];
cache.set('abc', new Map([data]));
expect(cache.get('abc').size).toBe(1);

expect(await warmUpCache('./generatedFolder', 'abc')).toBeFalsy();
});

it('exits if no blu files in generated folder', async () => {
expect(cache.get('abc').size).toBe(0);

expect(await warmUpCache('./emptyGeneratedFolder', 'abc')).toBeFalsy();
expect(Orchestrator.getLabelResolversAsync).toHaveBeenCalledTimes(0);
});

it('exits if Orchestrator settings is invalid', async () => {
(Orchestrator.getLabelResolversAsync as jest.Mock).mockImplementation(
async (intentModelPath: string, _: string, snapshots: Map<string, Uint8Array>) => {
return new Map<string, LabelResolver>();
}
);
(readJson as jest.Mock).mockImplementationOnce(async (file) => 'corrupted settings');

await warmUpCache('./generatedFolder', 'abc');
expect(pathExists).toHaveBeenCalledTimes(2);
expect(readJson).toHaveBeenCalled();

expect(Orchestrator.getLabelResolversAsync).toHaveBeenCalledTimes(0);
});

it('exits if Orchestrator settings cannot be read', async () => {
(readJson as jest.Mock).mockImplementationOnce(async (file) => undefined);

expect(await warmUpCache('./generatedFolder', 'abc')).toBeFalsy();
expect(pathExists).toHaveBeenCalledTimes(2);
expect(readJson).toHaveBeenCalled();

expect(Orchestrator.getLabelResolversAsync).toHaveBeenCalledTimes(0);
});

it('sends correct data shape to Orchestrator library for en + multilang', async () => {
expect(cache.get('abc').size).toBe(0);
expect(await readdir('./generatedFolder')).toContain('test.en.blu');

await warmUpCache('./generatedFolder', 'abc');

expect(Orchestrator.getLabelResolversAsync).toHaveBeenCalledTimes(2);
expect(Orchestrator.getLabelResolversAsync).toHaveBeenNthCalledWith(
1,
'./model/en.onnx',
'',
new Map([['test.en.lu', new Uint8Array(Buffer.from('test blu file'))]]),
false
);
expect(Orchestrator.getLabelResolversAsync).toHaveBeenNthCalledWith(
2,
'./model/multilang.onnx',
'',
new Map([['test.zh-cn.lu', new Uint8Array(Buffer.from('test blu file'))]]),
false
);
});

it('sends correct data shape to Orchestrator library for en only', async () => {
expect(cache.get('abc').size).toBe(0);

(readdir as jest.Mock).mockImplementationOnce(async (path: string) => ['test.en.blu', 'test.en-us.blu']);

await warmUpCache('./generatedFolder', 'abc');

expect(Orchestrator.getLabelResolversAsync).toHaveBeenCalledTimes(1);
expect(Orchestrator.getLabelResolversAsync).toHaveBeenNthCalledWith(
1,
'./model/en.onnx',
'',
new Map([
['test.en-us.lu', new Uint8Array(Buffer.from('test blu file'))],
['test.en.lu', new Uint8Array(Buffer.from('test blu file'))],
]),
false
);
});

it('sends correct data shape to Orchestrator library for multilang only', async () => {
expect(cache.get('abc').size).toBe(0);

(readdir as jest.Mock).mockImplementationOnce(async (path: string) => ['test.zh-cn.blu', 'test.ja-jp.blu']);

await warmUpCache('./generatedFolder', 'abc');

expect(Orchestrator.getLabelResolversAsync).toHaveBeenCalledTimes(1);
expect(Orchestrator.getLabelResolversAsync).toHaveBeenNthCalledWith(
1,
'./model/multilang.onnx',
'',
new Map([
['test.zh-cn.lu', new Uint8Array(Buffer.from('test blu file'))],
['test.ja-jp.lu', new Uint8Array(Buffer.from('test blu file'))],
]),
false
);
});
});
7 changes: 7 additions & 0 deletions Composer/packages/server/src/models/bot/builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,13 @@ export class Builder {
setEnvDefault('LUIS_USER_AGENT', userAgent);
setEnvDefault('QNA_USER_AGENT', userAgent);

try {
//warm up the orchestrator build cache before deleting and recreating the generated folder
await orchestratorBuilder.warmupCache(this.botDir, this.generatedFolderPath);
} catch (err) {
log(err);
}

try {
await this.createGeneratedDir();
//do cross train before publish
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,17 @@ class OrchestratorBuilder {
});
}

public async warmupCache(projectId: string, generatedFolderPath: string) {
const msgId = uniqueId();
const msg = { id: msgId, payload: { type: 'warmup', projectId, generatedFolderPath } };

return new Promise((resolve, reject) => {
this.resolves[msgId] = resolve;
this.rejects[msgId] = reject;
OrchestratorBuilder.worker.send(msg);
});
}

// Handle incoming calculation result
public handleMsg(msg: ResponseMsg) {
const { id, error, payload } = msg;
Expand All @@ -57,7 +68,10 @@ class OrchestratorBuilder {
const workerScriptPath = path.join(__dirname, 'orchestratorWorker.ts');
if (fs.existsSync(workerScriptPath)) {
// set exec arguments to empty, avoid fork nodemon `--inspect` error
this._worker = fork(workerScriptPath, [], { execArgv: ['-r', 'ts-node/register'] });
this._worker = fork(workerScriptPath, [], {
execArgv: ['-r', 'ts-node/register'],
env: { TS_NODE_PROJECT: path.resolve(__dirname, '..', '..', '..', '..', 'tsconfig.json') },
});
} else {
// set exec arguments to empty, avoid fork nodemon `--inspect` error
this._worker = fork(path.join(__dirname, 'orchestratorWorker.js'), [], { execArgv: [] });
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@

import { FileInfo } from '@bfc/shared';
import { LabelResolver, Orchestrator } from '@microsoft/bf-orchestrator';
import { writeFile } from 'fs-extra';
import { writeFile, readdir, readFile, pathExists, readJson } from 'fs-extra';
import partition from 'lodash/partition';

import { Path } from '../../../utility/path';
import { IOrchestratorBuildOutput } from '../interface';
import { IOrchestratorBuildOutput, IOrchestratorSettings } from '../interface';

import { RequestMsg } from './types';

Expand All @@ -25,9 +26,89 @@ export class LabelResolversCache {
public removeProject(projectId: string) {
this.projects.delete(projectId);
}

public clear() {
this.projects.clear();
}
}

export const cache = new LabelResolversCache();

/**
* Orchestrator: Warm up the LabelResolversCache if .blu files already exist.
*
* The Orchestrator build process is iterative - the results of every build are cached, and the cache
* is used in subsequent builds to reduce the number of utterance embeddings that have to be re-calculated.
*
* However, if a user starts a new session of Composer and reopens the same bot project,
* the caches will be empty and training will begin from scratch again.
*
* If a user has ever built a bot with Orchestrator, embeddings (in the form of .blu files) for each
* utterance will be stored in the /generated folder.
*
* We warm up the LabelResolversCache with these blu files and pass this cache to the normal build
* process. Re-hydrating the cache from files is still cheaper than recalculating the embeddings from scratch.
*
* @param projectId
* @param modelPath
* @param storage
* @param generatedFolderPath
*/
export async function warmUpCache(generatedFolderPath: string, projectId: string) {
//warm up the cache only if it's empty and we've built this bot before
if (!(await pathExists(generatedFolderPath)) || cache.get(projectId).size > 0) {
return false;
}

const bluFiles = (await readdir(generatedFolderPath)).filter((fileName) => fileName.endsWith('.blu'));

if (!bluFiles.length) {
return false;
}

const orchestratorSettingsPath = Path.resolve(generatedFolderPath, 'orchestrator.settings.json');
if (!(await pathExists(orchestratorSettingsPath))) {
return false;
}

// an implementation detail is that we need to use the right model to reproduce the right LabelResolvers
// so we get the model versions from a pre-existing settings file, and split the files based on
// language
const orchestratorSettings: IOrchestratorSettings = await readJson(orchestratorSettingsPath);
if (!orchestratorSettings?.orchestrator?.models || !orchestratorSettings?.orchestrator?.models) {
return false;
}

const [enLuFiles, multiLangLuFiles] = partition(bluFiles, (f) => f.split('.')?.[1].startsWith('en'));

const modelDatas = [
{ model: orchestratorSettings?.orchestrator?.models?.en, lang: 'en', luFiles: enLuFiles },
{ model: orchestratorSettings?.orchestrator?.models?.multilang, lang: 'multilang', luFiles: multiLangLuFiles },
];

const [enMap, multilangMap] = await Promise.all(
modelDatas.map(async (modelData) => {
const snapshotData = await Promise.all(
modelData.luFiles.map(
async (f) =>
[f.replace('.blu', '.lu'), new Uint8Array(await readFile(Path.join(generatedFolderPath, f)))] as [
string,
Uint8Array
]
)
);

return modelData.model && snapshotData.length
? await Orchestrator.getLabelResolversAsync(modelData.model, '', new Map(snapshotData), false)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume this matches the lu object id "f.replace('.blu', '.lu')" pass in the buildAsync method, correct?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, these are also the ids we get back from the results object from buildAsync,

: new Map<string, LabelResolver>();
})
);

cache.set(projectId, new Map([...enMap, ...multilangMap]));

return true;
}

const cache = new LabelResolversCache();
/**
* Orchestrator: Build command to compile .lu files into Binary LU (.blu) snapshots.
*
Expand All @@ -39,7 +120,6 @@ const cache = new LabelResolversCache();
* @param fullEmbedding - Use larger embeddings and skip size optimization (default: false)
* @returns An object containing snapshot bytes and recognizer dialogs for each .lu file
*/

export async function orchestratorBuilder(
projectId: string,
files: FileInfo[],
Expand Down Expand Up @@ -91,6 +171,12 @@ const handleMessage = async (msg: RequestMsg) => {
process.send?.({ id: msg.id, payload: snapshots });
break;
}
case 'warmup': {
const { generatedFolderPath, projectId } = payload;
const done = await warmUpCache(generatedFolderPath, projectId);
process.send?.({ id: msg.id, payload: done });
break;
}
}
} catch (error) {
return { id: msg.id, error };
Expand Down
2 changes: 1 addition & 1 deletion Composer/packages/server/src/models/bot/process/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import { FileInfo } from '@bfc/shared';

export type BuildPayload = {
type: 'build';
type: 'build' | 'warmup';
projectId: string;
files: FileInfo[];
modelPath: string;
Expand Down
Loading