Skip to content

Commit

Permalink
Add language support for qnamaker build related api (#1083)
Browse files Browse the repository at this point in the history
* support output to file for kb:export command

* add language support for qna builder

* add more locale to language mappings

* add more test cases and validation for locale or language support

* fix typo
  • Loading branch information
feich-ms committed Feb 8, 2021
1 parent 17c7565 commit b300d92
Show file tree
Hide file tree
Showing 9 changed files with 397 additions and 22 deletions.
2 changes: 1 addition & 1 deletion packages/lu/src/parser/cross-train/crossTrainer.js
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ const qnaCrossTrain = function (qnaFileIdToResourceMap, luFileIdToResourceMap, i
try {
for (const qnaObjectId of Array.from(qnaFileIdToResourceMap.keys())) {
let fileName = path.basename(qnaObjectId, path.extname(qnaObjectId))
const culture = fileHelper.getCultureFromPath(qnaObjectId)
const culture = fileHelper.getQnACultureFromPath(qnaObjectId)
fileName = culture ? fileName.substring(0, fileName.length - culture.length - 1) : fileName

const luObjectId = Array.from(luFileIdToResourceMap.keys()).find(x => x.toLowerCase() === qnaObjectId.toLowerCase())
Expand Down
2 changes: 1 addition & 1 deletion packages/lu/src/parser/lubuild/builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ export class Builder {
let fileCulture: string
let fileName: string

let cultureFromPath = fileHelper.getCultureFromPath(file)
let cultureFromPath = fileHelper.getLuisCultureFromPath(file)
if (cultureFromPath) {
fileCulture = cultureFromPath
let fileNameWithCulture = path.basename(file, path.extname(file))
Expand Down
16 changes: 15 additions & 1 deletion packages/lu/src/parser/qnabuild/builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ const Content = require('./../lu/qna')
const KB = require('./../qna/qnamaker/kb')
const recognizerType = require('./../utils/enums/recognizertypes')
const qnaOptions = require('./../lu/qnaOptions')
const localeToQnALanguageMap = require('./../utils/enums/localeToQnALanguageMap')

export class Builder {
private readonly handler: (input: string) => any
Expand All @@ -38,7 +39,7 @@ export class Builder {
for (const file of files) {
let fileCulture: string
let fileName: string
let cultureFromPath = fileHelper.getCultureFromPath(file)
let cultureFromPath = fileHelper.getQnACultureFromPath(file)
if (cultureFromPath) {
fileCulture = cultureFromPath
let fileNameWithCulture = path.basename(file, path.extname(file))
Expand All @@ -48,6 +49,10 @@ export class Builder {
fileName = path.basename(file, path.extname(file))
}

if (!fileCulture) {
throw (new exception(retCode.errorCode.INVALID_INPUT_FILE, 'Culture is not set or unsupported by qnamaker service.'))
}

let fileContent = ''

let qnaFiles = await fileHelper.getLuObjects(undefined, file, true, fileExtEnum.QnAFile)
Expand Down Expand Up @@ -209,7 +214,15 @@ export class Builder {
// set kb name
if (!currentQna.kb.name) currentQna.kb.name = `${botName}(${suffix}).${qnamakerContent.language}.qna`

// set kb locale and map it to language that qna service can recognize
let locale = qnamakerContent.language
let language = localeToQnALanguageMap[locale]
if (!language) {
throw new Error(`${locale} is not supported in current qnamaker service.`)
}

let currentKB = currentQna.kb
currentKB.language = language
let currentAlt = currentQna.alterations
let hostName = ''
let kbId = ''
Expand Down Expand Up @@ -446,6 +459,7 @@ export class Builder {
await delay(delayDuration)
const emptyKBJson = {
name: currentKB.name,
language: currentKB.language,
qnaList: [],
urls: [],
files: []
Expand Down
295 changes: 295 additions & 0 deletions packages/lu/src/parser/utils/enums/localeToQnALanguageMap.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,295 @@
/**
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License.
*/
module.exports = {
'ar': 'Arabic',
'ar-dz': 'Arabic',
'ar-bh': 'Arabic',
'ar-eg': 'Arabic',
'ar-iq': 'Arabic',
'ar-jo': 'Arabic',
'ar-kw': 'Arabic',
'ar-lb': 'Arabic',
'ar-ly': 'Arabic',
'ar-ma': 'Arabic',
'ar-om': 'Arabic',
'ar-qa': 'Arabic',
'ar-sa': 'Arabic',
'ar-sy': 'Arabic',
'ar-tn': 'Arabic',
'ar-ae': 'Arabic',
'ar-ye': 'Arabic',
'hy': 'Armenian',
'hy-am': 'Armenian',
'bn': 'Bangla',
'bn-bd': 'Bangla',
'bn-in': 'Bangla',
'eu': 'Basque',
'eu-es': 'Basque',
'bg': 'Bulgarian',
'bg-bg': 'Bulgarian',
'ca': 'Catalan',
'ca-es': 'Catalan',
'zh': 'Chinese_Simplified',
'zh-hans': 'Chinese_Simplified',
'zh-cn': 'Chinese_Simplified',
'zh-sg': 'Chinese_Simplified',
'zh-hant': 'Chinese_Traditional',
'zh-hk': 'Chinese_Traditional',
'zh-mo': 'Chinese_Traditional',
'zh-tw': 'Chinese_Traditional',
'hr': 'Croatian',
'hr-ba': 'Croatian',
'hr-hr': 'Croatian',
'cs': 'Czech',
'cs-cz': 'Czech',
'da': 'Danish',
'da-dk': 'Danish',
'nl': 'Dutch',
'nl-be': 'Dutch',
'nl-nl': 'Dutch',
'en': 'English',
'en-as': 'English',
'en-ai': 'English',
'en-ag': 'English',
'en-au': 'English',
'en-at': 'English',
'en-bs': 'English',
'en-bb': 'English',
'en-be': 'English',
'en-bz': 'English',
'en-bm': 'English',
'en-bw': 'English',
'en-io': 'English',
'en-vg': 'English',
'en-bi': 'English',
'en-cm': 'English',
'en-ca': 'English',
'en-029': 'English',
'en-ky': 'English',
'en-cx': 'English',
'en-cc': 'English',
'en-ck': 'English',
'en-cy': 'English',
'en-dk': 'English',
'en-dm': 'English',
'en-er': 'English',
'en-150': 'English',
'en-fk': 'English',
'en-fj': 'English',
'en-fi': 'English',
'en-gm': 'English',
'en-de': 'English',
'en-gh': 'English',
'en-gi': 'English',
'en-gd': 'English',
'en-gu': 'English',
'en-gg': 'English',
'en-gy': 'English',
'en-hk': 'English',
'en-in': 'English',
'en-id': 'English',
'en-ie': 'English',
'en-im': 'English',
'en-il': 'English',
'en-jm': 'English',
'en-je': 'English',
'en-ke': 'English',
'en-ki': 'English',
'en-ls': 'English',
'en-lr': 'English',
'en-mo': 'English',
'en-mg': 'English',
'en-mw': 'English',
'en-my': 'English',
'en-mt': 'English',
'en-mh': 'English',
'en-mu': 'English',
'en-fm': 'English',
'en-ms': 'English',
'en-na': 'English',
'en-nr': 'English',
'en-nl': 'English',
'en-nz': 'English',
'en-ng': 'English',
'en-nu': 'English',
'en-nf': 'English',
'en-mp': 'English',
'en-pk': 'English',
'en-pw': 'English',
'en-pg': 'English',
'en-ph': 'English',
'en-pn': 'English',
'en-pr': 'English',
'en-rw': 'English',
'en-kn': 'English',
'en-lc': 'English',
'en-vc': 'English',
'en-ws': 'English',
'en-sc': 'English',
'en-sl': 'English',
'en-sg': 'English',
'en-sx': 'English',
'en-si': 'English',
'en-sb': 'English',
'en-za': 'English',
'en-ss': 'English',
'en-sh': 'English',
'en-sd': 'English',
'en-sz': 'English',
'en-se': 'English',
'en-ch': 'English',
'en-tz': 'English',
'en-tk': 'English',
'en-to': 'English',
'en-tt': 'English',
'en-tc': 'English',
'en-tv': 'English',
'en-um': 'English',
'en-vi': 'English',
'en-ug': 'English',
'en-gb': 'English',
'en-us': 'English',
'en-vu': 'English',
'en-001': 'English',
'en-zm': 'English',
'en-zw': 'English',
'et': 'Estonian',
'et-ee': 'Estonian',
'fi': 'Finnish',
'fi-fi': 'Finnish',
'fr': 'French',
'fr-be': 'French',
'fr-cm': 'French',
'fr-ca': 'French',
'fr-029': 'French',
'fr-ci': 'French',
'fr-fr': 'French',
'fr-ht': 'French',
'fr-lu': 'French',
'fr-ml': 'French',
'fr-mc': 'French',
'fr-ma': 'French',
'fr-re': 'French',
'fr-sn': 'French',
'fr-ch': 'French',
'fr-cd': 'French',
'gl': 'Galician',
'gl-es': 'Galician',
'de': 'German',
'de-at': 'German',
'de-de': 'German',
'de-li': 'German',
'de-lu': 'German',
'de-ch': 'German',
'el': 'Greek',
'el-gr': 'Greek',
'gu': 'Gujarati',
'gu-in': 'Gujarati',
'he': 'Hebrew',
'he-il': 'Hebrew',
'hi': 'Hindi',
'hi-in': 'Hindi',
'hu': 'Hungarian',
'hu-hu': 'Hungarian',
'is': 'Icelandic',
'is-is': 'Icelandic',
'id': 'Indonesian',
'id-id': 'Indonesian',
'ga': 'Irish',
'ga-ie': 'Irish',
'it': 'Italian',
'it-it': 'Italian',
'it-ch': 'Italian',
'ja': 'Japanese',
'ja-jp': 'Japanese',
'kn': 'Kannada',
'kn-in': 'Kannada',
'ko': 'Korean',
'ko-kr': 'Korean',
'lv': 'Latvian',
'lv-lv': 'Latvian',
'lt': 'Lithuanian',
'lt-lt': 'Lithuanian',
'ml': 'Malayalam',
'ml-in': 'Malayalam',
'ms': 'Malay',
'ms-bn': 'Malay',
'ms-my': 'Malay',
'no': 'Norwegian',
'nb': 'Norwegian',
'nb-no': 'Norwegian',
'nn': 'Norwegian',
'nn-no': 'Norwegian',
'pl': 'Polish',
'pl-pl': 'Polish',
'pt': 'Portuguese',
'pt-br': 'Portuguese',
'pt-pt': 'Portuguese',
'pa': 'Punjabi',
'pa-arab': 'Punjabi',
'pa-in': 'Punjabi',
'pa-arab-pk': 'Punjabi',
'ro': 'Romanian',
'ro-md': 'Romanian',
'ro-ro': 'Romanian',
'ru': 'Russian',
'ru-md': 'Russian',
'ru-ru': 'Russian',
'sr': 'Serbian_Cyrillic',
'sr-cyrl': 'Serbian_Cyrillic',
'sr-cyrl-ba': 'Serbian_Cyrillic',
'sr-cyrl-me': 'Serbian_Cyrillic',
'sr-cyrl-rs': 'Serbian_Cyrillic',
'sr-latn': 'Serbian_Latin',
'sr-latn-ba': 'Serbian_Latin',
'sr-latn-me': 'Serbian_Latin',
'sr-latn-rs': 'Serbian_Latin',
'sk': 'Slovak',
'sk-sk': 'Slovak',
'sl': 'Slovenian',
'sl-si': 'Slovenian',
'es': 'Spanish',
'es-ar': 'Spanish',
'es-bo': 'Spanish',
'es-cl': 'Spanish',
'es-co': 'Spanish',
'es-cr': 'Spanish',
'es-cu': 'Spanish',
'es-do': 'Spanish',
'es-ec': 'Spanish',
'es-sv': 'Spanish',
'es-gt': 'Spanish',
'es-hn': 'Spanish',
'es-419': 'Spanish',
'es-mx': 'Spanish',
'es-ni': 'Spanish',
'es-pa': 'Spanish',
'es-py': 'Spanish',
'es-pe': 'Spanish',
'es-pr': 'Spanish',
'es-es': 'Spanish',
'es-us': 'Spanish',
'es-uy': 'Spanish',
'es-ve': 'Spanish',
'sv': 'Swedish',
'sv-fi': 'Swedish',
'sv-se': 'Swedish',
'ta': 'Tamil',
'ta-in': 'Tamil',
'ta-lk': 'Tamil',
'te': 'Telugu',
'te-in': 'Telugu',
'th': 'Thai',
'th-th': 'Thai',
'tr': 'Turkish',
'tr-tr': 'Turkish',
'uk': 'Ukrainian',
'uk-ua': 'Ukrainian',
'ur': 'Urdu',
'ur-in': 'Urdu',
'ur-pk': 'Urdu',
'vi': 'Vietnamese',
'vi-vn': 'Vietnamese'
};
Loading

0 comments on commit b300d92

Please sign in to comment.