Skip to content

Commit

Permalink
support section and cross-train in bf lu (#354)
Browse files Browse the repository at this point in the history
* add section support

* fix bug

* fix unit tests

* add tests for section enabled and mergeIntents disabled

* add CRUD for section and intent

* fix typo

* refactor lu parser component to sections

* optimize

* optimize g4 parser

* fix some bugs

* fix nestintents bug

* init section CRUD

* add section check step

* fix bugs based on failed tests

* fix some tests and typo bugs

* fix tests

* fix luis_sorted.lu

* revert unnecessary changes in test files

* optimize and add tests

* revert section api in this pr

* remove uuid from section

* optimize logic and add tests

* revert test file changes

* add more tests

* add test for enableSections set to false

* add more test

* export parse content of section name line when it is treated as intent

* refine section crud

* Update sectionOperator.js

fix function error

* extract isSectionEnabled in parser

* add more tests

* fix test failures

* expose parseLuList function

* add interuption intent converter function

* commit covert.ts changes

* Revert "commit covert.ts changes"

This reverts commit 8bd5abe.

* add crossTrain command

* add crossTrain function

* optimize crossTrain

* optimize name

* support to convert recruse sub folder files

* add entity support

* support entity

* optimize tests

* optimize cross train

* optimize tests

* fix test cases

* add locale support

* support local intent

* support empty lu file content

* try to merge master 2

* move newly added tests in luis folder to lu fodler

* fix conflicts

* fix tests

* fix style

* fix style issues

* add docs

* optimize style

* remove cross-train cli

* add test for getConfigFile function in fileHelper

* optimize code

* optimize test

* fix style warining

* revert remove cli commit

* fix getLuFiles function to accept multiple files

* exclude generated files in test coverage

* ignore generated files in test coverage

* remove dup config in lu's package.json

* refine code

* refine code

* fix style validation error

* support nestedIntentSection

* support nestedIntentSection

* remove cross train doc

* remove more cross train doc

* optimize tests

* optimize luCrossTrainer structure

* optimize tests

* fix bug

* remove entities from interuption and labels from utterances and pull patterns

* make config format from .config to .json and refine crossTrain function interface

* optimize config structure and add multiple dialog invocations support in same trigger

* optimize code structure. One root one core training

* remove only in test file

* fix style error

* expose dignostics class

* throw diagnostic in luis validation

* optimize code based on comments

* remove cross-train cli related changes and only keep the api
  • Loading branch information
feich-ms committed Nov 27, 2019
1 parent cda90ea commit 13f2fce
Show file tree
Hide file tree
Showing 52 changed files with 4,499 additions and 1,658 deletions.
2 changes: 1 addition & 1 deletion packages/lu/src/parser/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
const modules = {
parser: {
parseFile: require('./lufile/parseFileContents').parseFile,
validateLUISBlob: require('./luis/luisValidator').validateLUIS
validateLUISBlob: require('./luis/luisValidator')
},
refresh: {
constructMdFromLUIS: require('./luis/luConverter'),
Expand Down
266 changes: 266 additions & 0 deletions packages/lu/src/parser/lu/luCrossTrainer.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,266 @@
const retCode = require('./../utils/enums/CLI-errors')
const helpers = require('./../utils/helpers')
const exception = require('./../utils/exception')
const luParser = require('./../lufile/luParser');
const SectionOperator = require('./../lufile/sectionOperator');
const LUSectionTypes = require('./../utils/enums/lusectiontypes');
const DiagnosticSeverity = require('./../lufile/diagnostic').DiagnosticSeverity;
const NEWLINE = require('os').EOL;

module.exports = {
/**
* Do cross training among lu files
* @param {luObject[]} luObjectArray the luObject list to be parsed
* @param {any} crossTrainConfig cross train json config
* @returns {Map<string, LUResource>} Map of file id and luResource
* @throws {exception} Throws on errors. exception object includes errCode and text
*/
luCrossTrain: async function (luObjectArray, crossTrainConfig) {
try {
const crossTrainConfigObj = JSON.parse(crossTrainConfig);
const rootObjectIds = crossTrainConfigObj.rootIds;
const triggerRules = crossTrainConfigObj.triggerRules;
const intentName = crossTrainConfigObj.intentName;
const verbose = crossTrainConfigObj.verbose;

// parse lu content to LUResource object
let fileIdToLuResourceMap = this.parseAndValidateLuContent(luObjectArray, verbose);

// contruct resource tree to build the father-children relationship among lu files
let resources = this.constructResoureTree(fileIdToLuResourceMap, triggerRules);

// do cross training from roots. One root one core training
for (const rootObjectId of rootObjectIds) {
if (resources.some(r => r.id === rootObjectId)) {
// do cross training for each root at top level
const result = this.crossTrain(rootObjectId, resources, intentName);
for (const res of result) {
fileIdToLuResourceMap.set(res.id, res.content);
}
} else {
throw (new exception(retCode.errorCode.INVALID_INPUT, `Sorry, root lu file '${rootObjectId}' does not exist`));
}
}

return fileIdToLuResourceMap;
} catch (err) {
throw (err)
}
},

/**
* Parse and validate luObject array to convert to LUResource object dict
* @param {luObject[]} luObjectArray the luObject list to be parsed
* @param {boolean} verbose indicate to enable log messages or not
* @returns {Map<string, LUResource>} Map of file id and luResource
* @throws {exception} Throws on errors. exception object includes errCode and text
*/
parseAndValidateLuContent: function (luObjectArray, verbose) {
let fileIdToLuResourceMap = new Map();
for (const luObject of luObjectArray) {
let luContent = luObject.content;
luContent = helpers.sanitizeNewLines(luContent);
if (luContent === undefined || luContent === '') continue;

let luResource = luParser.parse(luContent);
if (luResource.Errors && luResource.Errors.length > 0) {
if (verbose) {
var warns = luResource.Errors.filter(error => (error && error.Severity && error.Severity === DiagnosticSeverity.WARN));
if (warns.length > 0) {
process.stdout.write(warns.map(warn => warn.toString()).join(NEWLINE).concat(NEWLINE));
}
}

var errors = luResource.Errors.filter(error => (error && error.Severity && error.Severity === DiagnosticSeverity.ERROR));
if (errors.length > 0) {
throw (new exception(retCode.errorCode.INVALID_LINE, errors.map(error => error.toString()).join(NEWLINE)));
}
}

fileIdToLuResourceMap.set(luObject.id, luResource);
}

return fileIdToLuResourceMap;
},

/**
* Contruct resource tree to build the father-children relationship among lu files
* @param {Map<string, LUResource>} fileIdToLuResourceMap Map of file id and luResource
* @param {any} triggerRules trigger rules object that indicate the triggering rules from root to dest lu files
* @returns {any[]} Object array of LUResource with id and children properties
* @throws {exception} Throws on errors. exception object includes errCode and text
*/
constructResoureTree(fileIdToLuResourceMap, triggerRules) {
let visitedChildren = new Set();
let resources = [];
let fileIdsFromInput = Array.from(fileIdToLuResourceMap.keys());
for (const fileId of fileIdsFromInput) {
let luResource = fileIdToLuResourceMap.get(fileId);
let resource = {
id: fileId,
content: luResource,
children: []
};

if (!(fileId in triggerRules)) {
resources.push(resource);
continue;
}

let intents = [];
for (const section of luResource.Sections) {
if (section.SectionType === LUSectionTypes.SIMPLEINTENTSECTION
|| section.SectionType === LUSectionTypes.NESTEDINTENTSECTION) {
intents.push(section);
}
}

const destLuFileToIntent = triggerRules[fileId];
for (const destLuFile of Object.keys(destLuFileToIntent)) {
if (!fileIdsFromInput.includes(destLuFile)) continue;

if (visitedChildren.has(destLuFile)) {
// validate loop in a tree or forest
throw (new exception(retCode.errorCode.INVALID_INPUT, `Sorry, dialog call loop detected for lu file ${destLuFile} when doing cross training`));
}

const triggerIntentName = destLuFileToIntent[destLuFile];
if (!intents.some(i => i.Name === triggerIntentName)) {
throw (new exception(retCode.errorCode.INVALID_INPUT, `Sorry, trigger intent '${triggerIntentName}' is not found in lu file: ${fileId}`));
}

resource.children.push({
target: destLuFile,
intent: triggerIntentName
});

visitedChildren.add(destLuFile);
}

resources.push(resource);
}

return resources;
},

/**
* Cross training core function. Do cross training from a root to its children once.
* @param {string} rootResourceId the root resource object id
* @param {any[]} resources all resource object list
* @param {string} intentName interuption intent name
* @returns {any[]} updated resource objects
* @throws {exception} Throws on errors. exception object includes errCode and text
*/
crossTrain: function (rootResourceId, resources, intentName) {
const idToResourceMap = new Map();
for (const resource of resources) {
idToResourceMap.set(resource.id, resource);
}

// Parse resources
let rootResource = resources.filter(r => r.id === rootResourceId)[0];
rootResource.visited = true;
this.mergeRootInteruptionToLeaves(rootResource, idToResourceMap, intentName);

return Array.from(idToResourceMap.values());
},

mergeRootInteruptionToLeaves: function (rootResource, result, intentName) {
if (rootResource.children === undefined || rootResource.length <= 0) return;

this.mergeBrothersInteruption(rootResource, result, intentName)
for (const child of rootResource.children) {
let childResource = result.get(child.target);
if (childResource.visited === undefined) {
const newChildResource = this.mergeFatherInteruptionToChild(rootResource, childResource, intentName);
result.set(child.target, newChildResource);
newChildResource.visited = true;
this.mergeRootInteruptionToLeaves(newChildResource, result, intentName);
}
}
},

mergeBrothersInteruption: function (resource, result, intentName) {
let children = resource.children;
for (const child of children) {
let triggerIntent = child.intent;
const brotherSections = resource.content.Sections.filter(s => s.Name !== triggerIntent
&& s.Name !== intentName
&& (s.SectionType === LUSectionTypes.SIMPLEINTENTSECTION || s.SectionType === LUSectionTypes.NESTEDINTENTSECTION));

let brotherUtterances = [];
brotherSections.forEach(s => {
if (s.SectionType === LUSectionTypes.SIMPLEINTENTSECTION) {
brotherUtterances = brotherUtterances.concat(s.UtteranceAndEntitiesMap.map(u => u.utterance));
} else {
s.SimpleIntentSections.forEach(section => {
brotherUtterances = brotherUtterances.concat(section.UtteranceAndEntitiesMap.map(u => u.utterance));
})
}
});

let targetResource = result.get(child.target);

// Merge direct brother's utterances
targetResource = this.mergeInteruptionIntent(brotherUtterances, targetResource, intentName);
result.set(targetResource.id, targetResource);
}
},

mergeFatherInteruptionToChild: function (fatherResource, childResource, intentName) {
const fatherInteruptions = fatherResource.content.Sections.filter(s => s.Name === intentName);
if (fatherInteruptions && fatherInteruptions.length > 0) {
const fatherInteruption = fatherInteruptions[0];
const fatherUtterances = fatherInteruption.UtteranceAndEntitiesMap.map(u => u.utterance);
childResource = this.mergeInteruptionIntent(fatherUtterances, childResource, intentName);
}

return childResource;
},

mergeInteruptionIntent: function (fromUtterances, toResource, intentName) {
const toInteruptions = toResource.content.Sections.filter(section => section.Name === intentName);
if (toInteruptions && toInteruptions.length > 0) {
const toInteruption = toInteruptions[0];
const existingUtterances = toInteruption.UtteranceAndEntitiesMap.map(u => u.utterance);
// construct new content here
let newFileContent = '';
fromUtterances.forEach(utterance => {
if (!existingUtterances.includes(utterance)) {
newFileContent += '- ' + utterance + NEWLINE;
}
});

if (newFileContent === '') return toResource;

newFileContent = toInteruption.ParseTree.intentDefinition().getText().trim() + NEWLINE + newFileContent;
let lines = newFileContent.split(/\r?\n/);
let newLines = [];
lines.forEach(line => {
if (line.trim().startsWith('-')) {
newLines.push('- ' + line.trim().slice(1).trim());
} else if (line.trim().startsWith('##')) {
newLines.push('## ' + line.trim().slice(2).trim());
} else if (line.trim().startsWith('#')) {
newLines.push('# ' + line.trim().slice(1).trim());
}
})

newFileContent = newLines.join(NEWLINE);

// update section here
toResource.content = new SectionOperator(toResource.content).updateSection(toInteruption.Id, newFileContent);
} else {
// construct new content here
if (fromUtterances && fromUtterances.length > 0) {
let newFileContent = NEWLINE + `# ${intentName}` + NEWLINE;
fromUtterances.forEach(utterance => newFileContent += '- ' + utterance + NEWLINE);

// add section here
toResource.content = new SectionOperator(toResource.content).addSection(newFileContent);
}
}

return toResource;
}
}
8 changes: 6 additions & 2 deletions packages/lu/src/parser/lufile/LUFileLexer.g4
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
lexer grammar LUFileLexer;

@lexer::members {
this.ignoreWS = true; // usually we ignore whitespace, but inside template, whitespace is significant
this.ignoreWS = true; // usually we ignore whitespace, but inside utterance, whitespace is significant
}

fragment LETTER: 'a'..'z' | 'A'..'Z';
Expand Down Expand Up @@ -34,7 +34,7 @@ QNA
;

HASH
: '#'+ {this.ignoreWS = true;} -> pushMode(INTENT_NAME_MODE)
: '#' {this.ignoreWS = true;} -> pushMode(INTENT_NAME_MODE)
;

DASH
Expand Down Expand Up @@ -133,6 +133,10 @@ WS_IN_NAME
: WHITESPACE+ -> type(WS)
;

HASH_IN_NAME
: '#' -> type(HASH)
;

NEWLINE_IN_NAME
: '\r'? '\n' -> type(NEWLINE), popMode
;
Expand Down
Loading

0 comments on commit 13f2fce

Please sign in to comment.