diff --git a/Composer/packages/server/package.json b/Composer/packages/server/package.json index 3c0ac6a288..479c5d5912 100644 --- a/Composer/packages/server/package.json +++ b/Composer/packages/server/package.json @@ -75,14 +75,14 @@ "@bfc/intellisense-languageserver": "*", "@bfc/lg-languageserver": "*", "@bfc/lu-languageserver": "*", - "@bfc/shared": "*", "@bfc/server-workers": "*", + "@bfc/shared": "*", "@botframework-composer/types": "*", "@microsoft/bf-dialog": "4.11.0-dev.20201025.69cf2b9", "@microsoft/bf-dispatcher": "^4.11.0-beta.20201016.393c6b2", "@microsoft/bf-generate-library": "^4.10.0-daily.20210225.217555", "@microsoft/bf-lu": "4.12.0-rc0", - "@microsoft/bf-orchestrator": "4.12.0-beta.20210322.314475a", + "@microsoft/bf-orchestrator": "4.13.0-beta.20210316.e8ec340", "applicationinsights": "^1.8.7", "archiver": "^5.0.2", "axios": "^0.21.1", diff --git a/Composer/packages/server/src/controllers/orchestrator.ts b/Composer/packages/server/src/controllers/orchestrator.ts index 188434637f..c6451f301b 100644 --- a/Composer/packages/server/src/controllers/orchestrator.ts +++ b/Composer/packages/server/src/controllers/orchestrator.ts @@ -52,7 +52,7 @@ async function downloadDefaultModel(req: Request, res: Response) { const lang = req.body; if (!isDefaultModelRequest(lang)) { - res.send(400); + res.sendStatus(400); return; } @@ -62,7 +62,7 @@ async function downloadDefaultModel(req: Request, res: Response) { if (await pathExists(modelPath)) { state = DownloadState.ALREADYDOWNLOADED; - return res.send(201); + return res.sendStatus(201); } const onProgress = (msg: string) => { diff --git a/Composer/packages/server/src/models/bot/__tests__/orchestratorWorker.test.ts b/Composer/packages/server/src/models/bot/__tests__/orchestratorWorker.test.ts new file mode 100644 index 0000000000..de1680565d --- /dev/null +++ b/Composer/packages/server/src/models/bot/__tests__/orchestratorWorker.test.ts @@ -0,0 +1,161 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +import { LabelResolver, Utility, Orchestrator } from '@microsoft/bf-orchestrator'; +import { pathExists, readdir, readJson } from 'fs-extra'; + +import { cache, warmUpCache } from '../process/orchestratorWorker'; + +jest.mock('@microsoft/bf-orchestrator'); +jest.mock('fs-extra', () => ({ + pathExists: jest.fn(async (path) => path === './generatedFolder' || path.endsWith('orchestrator.settings.json')), + readdir: jest.fn(async (path) => { + if (path === './generatedFolder') { + return ['test.en.lu', 'test.en.blu', 'test.zh-cn.blu', 'settings.json', '/path']; + } + return []; + }), + readJson: jest.fn(async (file) => { + return { + orchestrator: { + models: { + en: './model/en.onnx', + multilang: './model/multilang.onnx', + }, + snapshots: { + testZhCn: './generated/test.zh-cn.blu', + }, + }, + }; + }), + readFile: jest.fn(async (file) => { + return Buffer.from('test blu file'); + }), +})); + +describe('Orchestrator Warmup Cache', () => { + beforeAll(async () => { + Utility.toPrintDebuggingLogToConsole = false; //disable Orchestrator logging + }); + + beforeEach(async () => { + (Orchestrator.getLabelResolversAsync as jest.Mock).mockImplementation( + async (intentModelPath: string, _: string, snapshots: Map) => { + return new Map(); + } + ); + + (readdir as jest.Mock).mockClear(); + (pathExists as jest.Mock).mockClear(); + (Orchestrator.getLabelResolversAsync as jest.Mock).mockClear(); + + cache.clear(); + }); + + it('exits on invalid generatedFolderPath', async () => { + expect(await warmUpCache('badpath', 'abc')).toBeFalsy(); + }); + + it('exits if cache for project has contents', async () => { + const data: [string, LabelResolver] = ['test.en.lu', {} as LabelResolver]; + cache.set('abc', new Map([data])); + expect(cache.get('abc').size).toBe(1); + + expect(await warmUpCache('./generatedFolder', 'abc')).toBeFalsy(); + }); + + it('exits if no blu files in generated folder', async () => { + expect(cache.get('abc').size).toBe(0); + + expect(await warmUpCache('./emptyGeneratedFolder', 'abc')).toBeFalsy(); + expect(Orchestrator.getLabelResolversAsync).toHaveBeenCalledTimes(0); + }); + + it('exits if Orchestrator settings is invalid', async () => { + (Orchestrator.getLabelResolversAsync as jest.Mock).mockImplementation( + async (intentModelPath: string, _: string, snapshots: Map) => { + return new Map(); + } + ); + (readJson as jest.Mock).mockImplementationOnce(async (file) => 'corrupted settings'); + + await warmUpCache('./generatedFolder', 'abc'); + expect(pathExists).toHaveBeenCalledTimes(2); + expect(readJson).toHaveBeenCalled(); + + expect(Orchestrator.getLabelResolversAsync).toHaveBeenCalledTimes(0); + }); + + it('exits if Orchestrator settings cannot be read', async () => { + (readJson as jest.Mock).mockImplementationOnce(async (file) => undefined); + + expect(await warmUpCache('./generatedFolder', 'abc')).toBeFalsy(); + expect(pathExists).toHaveBeenCalledTimes(2); + expect(readJson).toHaveBeenCalled(); + + expect(Orchestrator.getLabelResolversAsync).toHaveBeenCalledTimes(0); + }); + + it('sends correct data shape to Orchestrator library for en + multilang', async () => { + expect(cache.get('abc').size).toBe(0); + expect(await readdir('./generatedFolder')).toContain('test.en.blu'); + + await warmUpCache('./generatedFolder', 'abc'); + + expect(Orchestrator.getLabelResolversAsync).toHaveBeenCalledTimes(2); + expect(Orchestrator.getLabelResolversAsync).toHaveBeenNthCalledWith( + 1, + './model/en.onnx', + '', + new Map([['test.en.lu', new Uint8Array(Buffer.from('test blu file'))]]), + false + ); + expect(Orchestrator.getLabelResolversAsync).toHaveBeenNthCalledWith( + 2, + './model/multilang.onnx', + '', + new Map([['test.zh-cn.lu', new Uint8Array(Buffer.from('test blu file'))]]), + false + ); + }); + + it('sends correct data shape to Orchestrator library for en only', async () => { + expect(cache.get('abc').size).toBe(0); + + (readdir as jest.Mock).mockImplementationOnce(async (path: string) => ['test.en.blu', 'test.en-us.blu']); + + await warmUpCache('./generatedFolder', 'abc'); + + expect(Orchestrator.getLabelResolversAsync).toHaveBeenCalledTimes(1); + expect(Orchestrator.getLabelResolversAsync).toHaveBeenNthCalledWith( + 1, + './model/en.onnx', + '', + new Map([ + ['test.en-us.lu', new Uint8Array(Buffer.from('test blu file'))], + ['test.en.lu', new Uint8Array(Buffer.from('test blu file'))], + ]), + false + ); + }); + + it('sends correct data shape to Orchestrator library for multilang only', async () => { + expect(cache.get('abc').size).toBe(0); + + (readdir as jest.Mock).mockImplementationOnce(async (path: string) => ['test.zh-cn.blu', 'test.ja-jp.blu']); + + await warmUpCache('./generatedFolder', 'abc'); + + expect(Orchestrator.getLabelResolversAsync).toHaveBeenCalledTimes(1); + expect(Orchestrator.getLabelResolversAsync).toHaveBeenNthCalledWith( + 1, + './model/multilang.onnx', + '', + new Map([ + ['test.zh-cn.lu', new Uint8Array(Buffer.from('test blu file'))], + ['test.ja-jp.lu', new Uint8Array(Buffer.from('test blu file'))], + ]), + false + ); + }); +}); diff --git a/Composer/packages/server/src/models/bot/builder.ts b/Composer/packages/server/src/models/bot/builder.ts index bda9eb69fe..e606fbdbd0 100644 --- a/Composer/packages/server/src/models/bot/builder.ts +++ b/Composer/packages/server/src/models/bot/builder.ts @@ -101,6 +101,13 @@ export class Builder { setEnvDefault('LUIS_USER_AGENT', userAgent); setEnvDefault('QNA_USER_AGENT', userAgent); + try { + //warm up the orchestrator build cache before deleting and recreating the generated folder + await orchestratorBuilder.warmupCache(this.botDir, this.generatedFolderPath); + } catch (err) { + log(err); + } + try { await this.createGeneratedDir(); //do cross train before publish diff --git a/Composer/packages/server/src/models/bot/process/orchestratorBuilder.ts b/Composer/packages/server/src/models/bot/process/orchestratorBuilder.ts index 1743c3dbfb..bf8dcfcfb6 100644 --- a/Composer/packages/server/src/models/bot/process/orchestratorBuilder.ts +++ b/Composer/packages/server/src/models/bot/process/orchestratorBuilder.ts @@ -33,6 +33,17 @@ class OrchestratorBuilder { }); } + public async warmupCache(projectId: string, generatedFolderPath: string) { + const msgId = uniqueId(); + const msg = { id: msgId, payload: { type: 'warmup', projectId, generatedFolderPath } }; + + return new Promise((resolve, reject) => { + this.resolves[msgId] = resolve; + this.rejects[msgId] = reject; + OrchestratorBuilder.worker.send(msg); + }); + } + // Handle incoming calculation result public handleMsg(msg: ResponseMsg) { const { id, error, payload } = msg; @@ -57,7 +68,10 @@ class OrchestratorBuilder { const workerScriptPath = path.join(__dirname, 'orchestratorWorker.ts'); if (fs.existsSync(workerScriptPath)) { // set exec arguments to empty, avoid fork nodemon `--inspect` error - this._worker = fork(workerScriptPath, [], { execArgv: ['-r', 'ts-node/register'] }); + this._worker = fork(workerScriptPath, [], { + execArgv: ['-r', 'ts-node/register'], + env: { TS_NODE_PROJECT: path.resolve(__dirname, '..', '..', '..', '..', 'tsconfig.json') }, + }); } else { // set exec arguments to empty, avoid fork nodemon `--inspect` error this._worker = fork(path.join(__dirname, 'orchestratorWorker.js'), [], { execArgv: [] }); diff --git a/Composer/packages/server/src/models/bot/process/orchestratorWorker.ts b/Composer/packages/server/src/models/bot/process/orchestratorWorker.ts index 7760be7c39..e667725b33 100644 --- a/Composer/packages/server/src/models/bot/process/orchestratorWorker.ts +++ b/Composer/packages/server/src/models/bot/process/orchestratorWorker.ts @@ -3,10 +3,11 @@ import { FileInfo } from '@bfc/shared'; import { LabelResolver, Orchestrator } from '@microsoft/bf-orchestrator'; -import { writeFile } from 'fs-extra'; +import { writeFile, readdir, readFile, pathExists, readJson } from 'fs-extra'; +import partition from 'lodash/partition'; import { Path } from '../../../utility/path'; -import { IOrchestratorBuildOutput } from '../interface'; +import { IOrchestratorBuildOutput, IOrchestratorSettings } from '../interface'; import { RequestMsg } from './types'; @@ -25,9 +26,89 @@ export class LabelResolversCache { public removeProject(projectId: string) { this.projects.delete(projectId); } + + public clear() { + this.projects.clear(); + } +} + +export const cache = new LabelResolversCache(); + +/** + * Orchestrator: Warm up the LabelResolversCache if .blu files already exist. + * + * The Orchestrator build process is iterative - the results of every build are cached, and the cache + * is used in subsequent builds to reduce the number of utterance embeddings that have to be re-calculated. + * + * However, if a user starts a new session of Composer and reopens the same bot project, + * the caches will be empty and training will begin from scratch again. + * + * If a user has ever built a bot with Orchestrator, embeddings (in the form of .blu files) for each + * utterance will be stored in the /generated folder. + * + * We warm up the LabelResolversCache with these blu files and pass this cache to the normal build + * process. Re-hydrating the cache from files is still cheaper than recalculating the embeddings from scratch. + * + * @param projectId + * @param modelPath + * @param storage + * @param generatedFolderPath + */ +export async function warmUpCache(generatedFolderPath: string, projectId: string) { + //warm up the cache only if it's empty and we've built this bot before + if (!(await pathExists(generatedFolderPath)) || cache.get(projectId).size > 0) { + return false; + } + + const bluFiles = (await readdir(generatedFolderPath)).filter((fileName) => fileName.endsWith('.blu')); + + if (!bluFiles.length) { + return false; + } + + const orchestratorSettingsPath = Path.resolve(generatedFolderPath, 'orchestrator.settings.json'); + if (!(await pathExists(orchestratorSettingsPath))) { + return false; + } + + // an implementation detail is that we need to use the right model to reproduce the right LabelResolvers + // so we get the model versions from a pre-existing settings file, and split the files based on + // language + const orchestratorSettings: IOrchestratorSettings = await readJson(orchestratorSettingsPath); + if (!orchestratorSettings?.orchestrator?.models || !orchestratorSettings?.orchestrator?.models) { + return false; + } + + const [enLuFiles, multiLangLuFiles] = partition(bluFiles, (f) => f.split('.')?.[1].startsWith('en')); + + const modelDatas = [ + { model: orchestratorSettings?.orchestrator?.models?.en, lang: 'en', luFiles: enLuFiles }, + { model: orchestratorSettings?.orchestrator?.models?.multilang, lang: 'multilang', luFiles: multiLangLuFiles }, + ]; + + const [enMap, multilangMap] = await Promise.all( + modelDatas.map(async (modelData) => { + const snapshotData = await Promise.all( + modelData.luFiles.map( + async (f) => + [f.replace('.blu', '.lu'), new Uint8Array(await readFile(Path.join(generatedFolderPath, f)))] as [ + string, + Uint8Array + ] + ) + ); + + return modelData.model && snapshotData.length + ? await Orchestrator.getLabelResolversAsync(modelData.model, '', new Map(snapshotData), false) + : new Map(); + }) + ); + + cache.set(projectId, new Map([...enMap, ...multilangMap])); + + return true; } -const cache = new LabelResolversCache(); /** * Orchestrator: Build command to compile .lu files into Binary LU (.blu) snapshots. * @@ -39,7 +120,6 @@ const cache = new LabelResolversCache(); * @param fullEmbedding - Use larger embeddings and skip size optimization (default: false) * @returns An object containing snapshot bytes and recognizer dialogs for each .lu file */ - export async function orchestratorBuilder( projectId: string, files: FileInfo[], @@ -91,6 +171,12 @@ const handleMessage = async (msg: RequestMsg) => { process.send?.({ id: msg.id, payload: snapshots }); break; } + case 'warmup': { + const { generatedFolderPath, projectId } = payload; + const done = await warmUpCache(generatedFolderPath, projectId); + process.send?.({ id: msg.id, payload: done }); + break; + } } } catch (error) { return { id: msg.id, error }; diff --git a/Composer/packages/server/src/models/bot/process/types.ts b/Composer/packages/server/src/models/bot/process/types.ts index af9e9bbff8..2a5abf105c 100644 --- a/Composer/packages/server/src/models/bot/process/types.ts +++ b/Composer/packages/server/src/models/bot/process/types.ts @@ -3,7 +3,7 @@ import { FileInfo } from '@bfc/shared'; export type BuildPayload = { - type: 'build'; + type: 'build' | 'warmup'; projectId: string; files: FileInfo[]; modelPath: string; diff --git a/Composer/yarn.lock b/Composer/yarn.lock index 937aa1b280..7c21dd258a 100644 --- a/Composer/yarn.lock +++ b/Composer/yarn.lock @@ -3947,12 +3947,12 @@ tslib "^2.0.3" xml2js "^0.4.19" -"@microsoft/bf-dispatcher@4.12.0-beta.20210322.314475a": - version "4.12.0-beta.20210322.314475a" - resolved "https://registry.yarnpkg.com/@microsoft/bf-dispatcher/-/bf-dispatcher-4.12.0-beta.20210322.314475a.tgz#48c2971b45cecc01461440636cac8fb74a5aac55" - integrity sha512-f4LrW7fRLmB+fZXB4OR6bHbKWMAY3E7tgELhSq5fziWCX2QGsOfD7iKekFtFT1NUXFl+J02a0nQm5O8li5/Geg== +"@microsoft/bf-dispatcher@4.13.0-beta.20210316.e8ec340": + version "4.13.0-beta.20210316.e8ec340" + resolved "https://registry.yarnpkg.com/@microsoft/bf-dispatcher/-/bf-dispatcher-4.13.0-beta.20210316.e8ec340.tgz#de2024f41b217c0aa937807855ff519ea1dc370f" + integrity sha512-meLDe5MKbXdnPYBy+pXHhH6k/cdGs7zW3Vw9Powf+OfX+oBEgxo8xlfa/J1rIBsleIiGWU9reU+D6gKK/16Uyg== dependencies: - "@microsoft/bf-lu" "4.12.0-rc0" + "@microsoft/bf-lu" next "@oclif/command" "~1.5.19" "@oclif/config" "~1.13.3" argparse "~1.0.10" @@ -4057,19 +4057,19 @@ semver "^5.5.1" tslib "^2.0.3" -"@microsoft/bf-orchestrator@4.12.0-beta.20210322.314475a": - version "4.12.0-beta.20210322.314475a" - resolved "https://registry.yarnpkg.com/@microsoft/bf-orchestrator/-/bf-orchestrator-4.12.0-beta.20210322.314475a.tgz#4bc37966ac1aa144f8daae78873dc92030e2251d" - integrity sha512-DLXjmPkdJzwTvbx6vHAEz4qGDe/O/60cax/Payo9cfXRRphsbdiKTD6W2gmnrP6Q9IMitJ0yv9s527iFEI3GrQ== +"@microsoft/bf-orchestrator@4.13.0-beta.20210316.e8ec340": + version "4.13.0-beta.20210316.e8ec340" + resolved "https://registry.yarnpkg.com/@microsoft/bf-orchestrator/-/bf-orchestrator-4.13.0-beta.20210316.e8ec340.tgz#74ae2b91cedc292e19ac33d3f974d1d4c70874ad" + integrity sha512-WdWKQkOev4mX1bdk002UsD3DU9jdQms0X4c+wyawgX8XEWFJSNT42EKfKea9jbP5DtiXhwRjzeaDIOpuTgHIFw== dependencies: - "@microsoft/bf-dispatcher" "4.12.0-beta.20210322.314475a" - "@microsoft/bf-lu" "4.12.0-rc0" + "@microsoft/bf-dispatcher" "4.13.0-beta.20210316.e8ec340" + "@microsoft/bf-lu" next "@types/fs-extra" "~8.1.0" "@types/node-fetch" "~2.5.5" fast-text-encoding "^1.0.3" fs-extra "~9.0.0" node-fetch "~2.6.0" - orchestrator-core "4.12.0-beta.1" + orchestrator-core beta read-text-file "~1.1.0" tslib "^1.10.0" unzip-stream "^0.3.1" @@ -17324,10 +17324,10 @@ optionator@^0.9.1: type-check "^0.4.0" word-wrap "^1.2.3" -orchestrator-core@4.12.0-beta.1: - version "4.12.0-beta.1" - resolved "https://registry.yarnpkg.com/orchestrator-core/-/orchestrator-core-4.12.0-beta.1.tgz#bc7a88f48b9c185588a7d179a5da3f2e42cfe23a" - integrity sha512-4Q+Ui/6rsiBJU1fwQAvxthMNWDY/qKvhrNYLnQNQ7llN/XI8Rk3yzUxfJ04V2WAa3Dm3GEXgix6/inWaiHjKdg== +orchestrator-core@beta: + version "4.13.0-dev.20210319.e772326h" + resolved "https://registry.yarnpkg.com/orchestrator-core/-/orchestrator-core-4.13.0-dev.20210319.e772326h.tgz#eb358c8b937dc4b6b9a5bb6aba4273a62d9349cf" + integrity sha512-LFXyhZAQB+xyvmz9oWFpR/5tItKu2C4HCVez61VPsTrP+K2f7WjFoUBVuq+NQU8MPpnFJkS1wVv7nmwk9pYw9Q== dependencies: bindings "1.2.1" node-addon-api "^3.0.0"