Skip to content
This repository has been archived by the owner on Jan 15, 2025. It is now read-only.

LU improvements for R8 #514

Merged
merged 37 commits into from
Feb 4, 2020
Merged
Show file tree
Hide file tree
Changes from 33 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
62e16b0
filter position and not require markdown
Jan 21, 2020
0085562
Fixing qnaSection.js
Jan 21, 2020
9c43ebf
Cross train with tests
Jan 22, 2020
ac823ef
Import support in .qna.
Jan 22, 2020
03282c5
Merge branch 'master' of https://github.com/microsoft/botframework-cl…
Jan 22, 2020
71c01a5
getting tests to pass
Jan 22, 2020
8bbd986
multi-turn support in qna file format
Jan 23, 2020
335990e
fixing multi-turn context-only
Jan 23, 2020
93136d6
Merge branch 'master' of https://github.com/microsoft/botframework-cl…
Jan 23, 2020
30660b2
Finalizing multiturn payload
Jan 23, 2020
17e8c49
Updating multi-turn support.
Jan 23, 2020
be21aca
support for ID in .qna
Jan 23, 2020
32ad038
Support for parsing qna source via app description
Jan 23, 2020
6abb8fc
Merge branch 'master' of https://github.com/microsoft/botframework-cl…
Jan 23, 2020
bdbad03
removing diskio from helper.
Jan 24, 2020
eda9d01
doc updates
Jan 24, 2020
1e0988f
FI from master
Jan 29, 2020
7870ba6
updates for multi-turn flat
Jan 29, 2020
0f58995
multi-turn in JsonToQnA
Jan 29, 2020
b176601
Fixing existing tests.
Jan 30, 2020
bcb1224
fixing tests
Jan 30, 2020
1444fdf
adding multi-turn tests
Jan 30, 2020
b757fbd
finalizing
Jan 30, 2020
9969f0c
more tests
Jan 30, 2020
fcf03c3
PR feedback.
Jan 30, 2020
104688c
import support
Jan 30, 2020
5a3a353
FI from master
Jan 30, 2020
7e17630
Finalizing LU changes
Feb 1, 2020
fc06097
Merge branch 'master' of https://github.com/microsoft/botframework-cl…
Feb 1, 2020
ff47cec
fixing tests
Feb 1, 2020
e4e8529
Merge branch 'master' into vishwac/lu-updates-r8
tsuwandy Feb 1, 2020
68e2d8b
Reverting changes to launch.json
Feb 3, 2020
f130ef3
Merge branch 'vishwac/lu-updates-r8' of https://github.com/microsoft/…
Feb 3, 2020
32427b8
Merge branch 'master' into vishwac/lu-updates-r8
vishwacsena Feb 4, 2020
b1badc6
Updates based on PR feedback and general cleanup
Feb 4, 2020
20d621f
Merge branch 'master' of https://github.com/microsoft/botframework-cl…
Feb 4, 2020
f10098a
MMerge branch 'vishwac/lu-updates-r8' of https://github.com/microsoft…
Feb 4, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/cli/test/fixtures/lastversioncheck/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@
"init": "../../../src/hooks/init/inithook"
}
},
"lastversioncheck": "2020-01-30T21:57:51.160Z"
"lastversioncheck": "2020-01-30T22:43:34.091Z"
}
5 changes: 5 additions & 0 deletions packages/lu/docs/lu-file-format.md
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,11 @@ Few different references are supported in the .lu file. These follow Markdown li
- `[link name](\<.lu file path>#*utterances*) will find all utterances in the .lu file and add them to the list of utterances where this reference is specified
- `[link name](\<.lu file path>#*patterns*) will find all patterns in the .lu file and add them to the list of utterances where this reference is specified
- `[link name](\<.lu file path>#*utterancesAndPatterns*) will find all utterances and patterns in the .lu file and add them to the list of utterances where this reference is specified
- `[link name](\<.qna file path>#$name?) will find all alterations from the specific alteration definition in the .qna content and add them to the list of utterances where this reference is specified
- `[link name](\<.qna file path>#*alterations*?) will find all alterations from the .qna content and add them to the list of utterances where this reference is specified
- `[link name](\<.qna file path>#?question-to-find?) will find all variation questions from the specific question and add them to the list of utterances where this reference is specified. Note: any spaces in your question will need to be replaced with '-'. E.g. '#?-book-flight'
- `[link name](\<.qna file path>#*answers*?) will find all answers and add them to the list of utterances where this reference is specified.



Here's an example of those references:
Expand Down
48 changes: 35 additions & 13 deletions packages/lu/src/parser/lu/luMerger.js
Original file line number Diff line number Diff line change
Expand Up @@ -286,27 +286,55 @@ const filterQuestionMarkRef = function(allParsedContent, parsedUtterance, srcFil
if (!parsedUtterance.path.endsWith('?')) {
return result
}

let parsedQnABlobs
let parsedQnABlobs, parsedQnAAlterations
if( parsedUtterance.fileName.endsWith('*')) {
// this notation is only valid with file path. So try as file path.
let tPath = parsedUtterance.fileName.replace(/\*/g, '');
parsedQnABlobs = (allParsedContent.QnAContent || []).filter(item => item.srcFile.includes(path.resolve(path.dirname(srcFile), tPath)));
parsedQnAAlterations = (allParsedContent.QnAAlterations || []).filter(item => item.srcFile.includes(path.resolve(path.dirname(srcFile), tPath)));
} else {
// look for QnA
parsedQnABlobs = []
parsedQnAAlterations = []
parsedQnABlobs.push((allParsedContent.QnAContent || []).find(item => item.srcFile == parsedUtterance.fileName || item.srcFile == path.resolve(path.dirname(srcFile), parsedUtterance.fileName)));
parsedQnAAlterations.push((allParsedContent.QnAAlterations || []).find(item => item.srcFile == parsedUtterance.fileName || item.srcFile == path.resolve(path.dirname(srcFile), parsedUtterance.fileName)));
}

if(!parsedQnABlobs || !parsedQnABlobs[0]) {
if(!parsedQnABlobs || !parsedQnABlobs[0] || !parsedQnAAlterations || !parsedQnAAlterations[0]) {
let error = BuildDiagnostic({
message: `Unable to parse ${utteranceText} in file: ${srcFile}. Cannot find reference.`
});

throw (new exception(retCode.errorCode.INVALID_INPUT, error.toString()));
}

parsedQnABlobs.forEach(blob => blob.qnaJsonStructure.qnaList.forEach(item => item.questions.forEach(question => result.utterances.push(question))));
if (parsedUtterance.path.toLowerCase().startsWith('*answers*')) {
parsedQnABlobs.forEach(blob => blob.qnaJsonStructure.qnaList.forEach(item => result.utterances.push(item.answer)));
} else if (parsedUtterance.path.length > 1 && parsedUtterance.path.startsWith('?') && parsedUtterance.path.endsWith('?')) {
let itemsFound = undefined;
let testQuestion = parsedUtterance.path.replace(/\?/g, '').replace(/-/g, ' ').trim();
// find the specific question
parsedQnABlobs.forEach(blob => {
if (itemsFound) return;
itemsFound = blob.qnaJsonStructure.qnaList.find(item => item.questions.includes(testQuestion));
})
if (itemsFound) {
itemsFound.questions.forEach(question => result.utterances.push(question));
}
} else if (parsedUtterance.path.toLowerCase().startsWith('*alterations*')) {
parsedQnAAlterations.forEach(blob => blob.qnaAlterations.wordAlterations.forEach(item => item.alterations.forEach(alter => result.utterances.push(alter))));
} else if (parsedUtterance.path.startsWith('$') && parsedUtterance.path.endsWith('?')) {
// specific alteration to find
let alterToFind = parsedUtterance.path.replace(/[$\?]/g, '').trim();
parsedQnAAlterations.forEach(blob => blob.qnaAlterations.wordAlterations.forEach(item => {
if (item.alterations.includes(alterToFind)) {
item.alterations.forEach(alter => result.utterances.push(alter));
}
}));
} else {
parsedQnABlobs.forEach(blob => blob.qnaJsonStructure.qnaList.forEach(item => item.questions.forEach(question => result.utterances.push(question))));
}

return result
}

Expand All @@ -329,13 +357,8 @@ const filterLuisContent = function(allParsedContent, parsedUtterance, srcFile, u
// get utterance list from reference intent and update list
let referenceIntent = parsedUtterance.path.toLowerCase().replace(/-/g, ' ').replace('*utterancesandpatterns*', '').trim();
if (referenceIntent === '') {
// get all utterances and add them
utterances = parsedLUISBlob.LUISJsonStructure.utterances;
// Find all patterns and add them
(parsedLUISBlob.LUISJsonStructure.patterns || []).forEach(item => {
let newUtterance = new hClasses.uttereances(item.pattern, item.intent);
if (utterances.find(match => deepEqual(newUtterance, match)) !== undefined) utterances.push(new hClasses.uttereances(item.pattern, item.intent))
});
patterns = parsedLUISBlob.LUISJsonStructure.patterns;
} else {
utterances = parsedLUISBlob.LUISJsonStructure.utterances.filter(item => item.intent == referenceIntent);
// find and add any patterns for this intent
Expand All @@ -354,8 +377,7 @@ const filterLuisContent = function(allParsedContent, parsedUtterance, srcFile, u
// get utterance list from reference intent and update list
let referenceIntent = parsedUtterance.path.toLowerCase().replace(/-/g, ' ').replace('*patterns*', '').trim();
if (referenceIntent === '') {
// Find all patterns and add them
(parsedLUISBlob.LUISJsonStructure.patterns || []).forEach(item => utterances.push(new hClasses.uttereances(item.pattern, item.intent)));
patterns = parsedLUISBlob.LUISJsonStructure.patterns;
} else {
// find and add any patterns for this intent
patterns = parsedLUISBlob.LUISJsonStructure.patterns.filter(item => item.intent == referenceIntent);
Expand Down
20 changes: 19 additions & 1 deletion packages/lu/src/parser/lufile/parseFileContents.js
Original file line number Diff line number Diff line change
Expand Up @@ -1806,7 +1806,25 @@ const parseAndHandleModelInfoSection = function (parsedContent, luResource, log)
}

if (kvPair[1].toLowerCase() === 'app') {
parsedContent.LUISJsonStructure[kvPair[2]] = kvPair[3];
if (kvPair[2].toLowerCase().startsWith('settings')) {
let settingsRegExp = /^settings.(?<property>.*?$)/gmi;
let settingsPair = settingsRegExp.exec(kvPair[2]);
if (settingsPair && settingsPair.groups && settingsPair.groups.property) {
if (!parsedContent.LUISJsonStructure.settings) {
parsedContent.LUISJsonStructure.settings = [{name : settingsPair.groups.property, value : kvPair[3] === "true"}];
} else {
// find the setting
let sFound = parsedContent.LUISJsonStructure.settings.find(setting => setting.name == settingsPair.groups.property);
if (sFound) {
sFound.value = kvPair[3] === "true";
} else {
parsedContent.LUISJsonStructure.settings.push({name : settingsPair.groups.property, value : kvPair[3] === "true"})
}
}
}
} else {
parsedContent.LUISJsonStructure[kvPair[2]] = kvPair[3];
}
} else if (kvPair[1].toLowerCase() === 'kb') {
parsedContent.qnaJsonStructure[kvPair[2]] = kvPair[3];
} else if (kvPair[1].toLowerCase() === 'intent') {
Expand Down
5 changes: 5 additions & 0 deletions packages/lu/src/parser/luis/luConverter.js
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,11 @@ const addAppMetaData = function(LUISJSON) {
if (LUISJSON.versionId) fileContent += `> !# @app.versionId = ${LUISJSON.versionId}` + NEWLINE;
if (LUISJSON.culture) fileContent += `> !# @app.culture = ${LUISJSON.culture}` + NEWLINE;
if (LUISJSON.luis_schema_version) fileContent += `> !# @app.luis_schema_version = ${LUISJSON.luis_schema_version}` + NEWLINE;
if (LUISJSON.settings) {
LUISJSON.settings.forEach(setting => {
fileContent += `> !# @app.settings.${setting.name} = ${setting.value}` + NEWLINE;
})
}
return fileContent === '' ? fileContent : `> LUIS application information` + NEWLINE + fileContent + NEWLINE + NEWLINE;
}
/**
Expand Down
136 changes: 135 additions & 1 deletion packages/lu/src/parser/luis/luisValidator.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ const retCode = require('./../utils/enums/CLI-errors')
const helpers = require('./../utils/helpers')
const exception = require('./../utils/exception')
const BuildDiagnostic = require('./../lufile/diagnostic').BuildDiagnostic;

const validateLUIS = function(luisJSON) {
// look for entity name collisions - list, simple, patternAny, phraselist
// look for list entities labelled
Expand All @@ -28,9 +27,144 @@ const validateLUIS = function(luisJSON) {
validateUtterances(luisJSON, entitiesList)
// validate composite entities
validateComposites(luisJSON, entitiesList)
// do boundary validation
validateBoundaries(luisJSON);
return true;
}
const validateBoundaries = function(luisJSON) {
// boundaries documented here - https://docs.microsoft.com/en-us/azure/cognitive-services/luis/luis-boundaries

// intents: 500 per application: 499 custom intents, and the required None intent.
if (luisJSON.intents.length > retCode.boundaryLimits.MAX_NUM_INTENTS) {
validationError(retCode.errorCode.BOUNDARY_INTENTS, `${luisJSON.intents.length} intents found in application. At most ${retCode.boundaryLimits.MAX_NUM_INTENTS} is allowed.`)
}

// utterances: 15,000 per application - there is no limit on the number of utterances per intent
if (luisJSON.utterances.length > retCode.boundaryLimits.MAX_NUM_UTTERANCES) {
validationError(retCode.errorCode.BOUNDARY_UTTERANCES, `${luisJSON.utterances.length} utterances found in application. At most ${retCode.boundaryLimits.MAX_NUM_UTTERANCES} is allowed.`)
}

// pattern.any entities - 100 per application
if (luisJSON.patternAnyEntities.length > retCode.boundaryLimits.MAX_NUM_PATTERNANY_ENTITIES) {
validationError(retCode.errorCode.BOUNDARY_PATTERNANYENTITY, `${luisJSON.patternAnyEntities.length} pattern.any entities found in application. At most ${retCode.boundaryLimits.MAX_NUM_PATTERNANY_ENTITIES} is allowed.`)
}

// Utterances - 500 characters.
luisJSON.utterances.forEach(utt => {
if (utt.text.length > retCode.boundaryLimits.MAX_CHAR_IN_UTTERANCE) {
validationError(retCode.errorCode.BOUNDARY_UTTERANCE_CHAR_LENGTH, `utterance '${utt.text}' under intent '${utt.intent}' has ${utt.text.length} characters. At most ${retCode.boundaryLimits.MAX_CHAR_IN_UTTERANCE} is allowed.`)
}
})

// patterns - 500 patterns per application.
if (luisJSON.patterns.length > retCode.boundaryLimits.MAX_NUM_PATTERNS) {
validationError(retCode.errorCode.BOUNDARY_PATTERNS, `${luisJSON.patterns.length} patterns found in application. At most ${retCode.boundaryLimits.MAX_NUM_PATTERNS} is allowed.`)
}

// patterns - Maximum length of pattern is 400 characters.
luisJSON.patterns.forEach(utt => {
if (utt.pattern.length > retCode.boundaryLimits.MAX_CHAR_IN_PATTERNS) {
validationError(retCode.errorCode.BOUNDARY_PATTERN_CHAR_LIMIT, `Pattern '${utt.pattern}' under intent '${utt.intent}' has ${utt.pattern.length} characters. At most ${retCode.boundaryLimits.MAX_CHAR_IN_PATTERNS} characters are allowed in any pattern.`)
}
})

// regex entity - 20 entities.
if (luisJSON.regex_entities.length > retCode.boundaryLimits.MAX_NUM_REGEX_ENTITIES) {
validationError(retCode.errorCode.BOUNDARY_REGEX_ENTITY, `${luisJSON.regex_entities.length} regex entities found in application. At most ${retCode.boundaryLimits.MAX_NUM_REGEX_ENTITIES} is allowed.`)
}

// regex entity - 500 character max. per regular expression entity pattern
luisJSON.regex_entities.forEach(utt => {
if (utt.regexPattern.length > retCode.boundaryLimits.MAX_CHAR_REGEX_ENTITY_PATTERN) {
validationError(retCode.errorCode.BOUNDARY_REGEX_CHAR_LIMIT, `Regex entity '${utt.name}' with pattern /${utt.regexPattern}/ has ${utt.regexPattern.length} characters. At most ${retCode.boundaryLimits.MAX_CHAR_REGEX_ENTITY_PATTERN} is allowed.`)
}
})

// list entities: max 20000 synonyms.
luisJSON.closedLists.forEach(listEntity => {
vishwacsena marked this conversation as resolved.
Show resolved Hide resolved
listEntity.subLists.forEach(subList => {
if (subList.list.length > retCode.boundaryLimits.MAX_LIST_ENTITY_SYNONYMS) {
validationError(retCode.errorCode.BOUNDARY_SYNONYMS_LENGTH, `'${listEntity.name}' list entity for parent (normalized value) '${subList.canonicalForm}' has ${subList.list.length} synonyms. At most ${retCode.boundaryLimits.MAX_LIST_ENTITY_SYNONYMS} is allowed.`)
}
})
})

let phraseLists = luisJSON.model_features || luisJSON.phraseLists || [];
// phrase list - 500 phrase lists.
if (phraseLists.length > retCode.boundaryLimits.MAX_NUM_PHRASE_LISTS) {
validationError(retCode.errorCode.BOUNDARY_PHRASE_LIST_LIMIT, `${phraseLists.length} phrase lists found in application. At most ${retCode.boundaryLimits.MAX_NUM_PHRASE_LISTS} is allowed.`)
}

// phrase list - Maximum number of total phrases per application of 500,000 phrases.
let totalPhrasesInApp = 0;
phraseLists.forEach(item => totalPhrasesInApp += item.words.split(',').length);
if (totalPhrasesInApp > retCode.boundaryLimits.MAX_NUM_PHRASES_IN_ALL_PHRASE_LIST) {
validationError(retCode.errorCode.BOUNDARY_TOTAL_PHRASES, `${totalPhrasesInApp} phrases found across all phrase list definitions. At most ${retCode.boundaryLimits.MAX_NUM_PHRASES_IN_ALL_PHRASE_LIST} is allowed.`)
}

// phrase list - Interchangeable Phraselist has max of 50,000 phrases.
totalPhrasesInApp = 0;
phraseLists.filter(item => item.mode).forEach(item => totalPhrasesInApp += item.words.split(',').length);
if (totalPhrasesInApp > retCode.boundaryLimits.MAX_INTERCHANGEABLE_PHRASES) {
validationError(retCode.errorCode.BOUNDARY_INTC_PHRASES_LIMIT, `${totalPhrasesInApp} phrases found across all interchangeable phrase list definitions. At most ${retCode.boundaryLimits.MAX_INTERCHANGEABLE_PHRASES} is allowed.`)
}

// phrase list - Non-interchangeable phraselist has max of 5,000 phrases.
totalPhrasesInApp = 0;
phraseLists.filter(item => !item.mode).forEach(item => totalPhrasesInApp += item.words.split(',').length);
if (totalPhrasesInApp > retCode.boundaryLimits.MAX_NON_INTERCHANGEABLE_PHRASES) {
validationError(retCode.errorCode.BOUNDARY_NINTC_PHRASES_LIMIT, `${totalPhrasesInApp} phrases found across all non-interchangeable phrase list definitions. At most ${retCode.boundaryLimits.MAX_NON_INTERCHANGEABLE_PHRASES} is allowed.`)
}

// Roles - 10 roles per entity
let totalRoles = 0;
["prebuiltEntities", "patternAnyEntities", "regex_entities", "closedLists", "composites", "entities"].forEach(scope => {
vishwacsena marked this conversation as resolved.
Show resolved Hide resolved
luisJSON[scope].forEach(item => {
if (item.roles.length > retCode.boundaryLimits.MAX_ROLES_PER_ENTITY) {
validationError(retCode.errorCode.BOUNDARY_ROLES_PER_ENTITY, `${scope.substring(0, scope.length - 1)} ${item.name} has ${item.roles.length} roles. At most ${retCode.boundaryLimits.MAX_ROLES_PER_ENTITY} is allowed.`)
}
totalRoles += item.roles.length;
})
})

// Roles - 300 roles per application
if (totalRoles > retCode.boundaryLimits.MAX_NUM_ROLES) {
validationError(retCode.errorCode.BOUNDARY_TOTAL_ROLES, `${totalRoles} role definitions found across all entity types. At most ${retCode.boundaryLimits.MAX_NUM_ROLES} is allowed.`)
}

// features - Maximum number of models that can be used as a descriptor (feature) to a specific model to be 10 models.
["intents", "entities"].forEach(scope => {
luisJSON[scope].forEach(item => {
if (item.features && item.features.length > retCode.boundaryLimits.MAX_NUM_DESCRIPTORS_PER_MODEL) {
validationError(retCode.errorCode.BOUNDARY_FEATURE_PER_MODEL, `${scope.substring(0, scope.length - 1)} ${item.name} has ${item.features.length} descriptors (feature). At most ${retCode.boundaryLimits.MAX_NUM_DESCRIPTORS_PER_MODEL} is allowed.`)
}
})
})

// ml entities + roles - A limit of either 100 parent entities or 330 entities, whichever limit the user hits first. A role counts as an entity for the purpose of this boundary. An example is a composite with a simple entity, which has 2 roles is: 1 composite + 1 simple + 2 roles = 4 of the 330 entities.
let numberOfParentEntities = 0;
luisJSON.entities.forEach(item => {
if (item.children && item.children.length > 0) numberOfParentEntities += 1;
})

let totalNumberOfEntitiesAndRoles = 0;
["prebuiltEntities", "patternAnyEntities", "regex_entities", "closedLists", "composites", "entities"].forEach(item => {
totalNumberOfEntitiesAndRoles += luisJSON[item].length;
})
totalNumberOfEntitiesAndRoles += totalRoles;

if (numberOfParentEntities > retCode.boundaryLimits.MAX_NUM_PARENT_ENTITIES) {
validationError(retCode.errorCode.BOUNDARY_PARENT_ENTITY_LIMIT, `${numberOfParentEntities} parent ml entities found in application. At most ${retCode.boundaryLimits.MAX_NUM_PARENT_ENTITIES} is allowed.`)
}

if (totalNumberOfEntitiesAndRoles > retCode.boundaryLimits.MAX_TOTAL_ENTITES_AND_ROLES) {
validationError(retCode.errorCode.BOUNDARY_TOTAL_ENTITIES_AND_ROLES, `${totalNumberOfEntitiesAndRoles} combined roles and entity definitions found. At most ${retCode.boundaryLimits.MAX_TOTAL_ENTITES_AND_ROLES} is allowed.`)
}
}
const validationError = function (code, errorMsg) {
let error = BuildDiagnostic({ message: errorMsg });
throw (new exception(code, error.toString(), [error]));
}
const addEntities = function(luisJSON, entitiesList){
luisJSON.entities.forEach(function (entity) {
entitiesList.push(new validateLUISBlobEntity(entity.name, ['simple'], entity.roles));
Expand Down
Loading