Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Contentful/keyword match improvements #352

Merged
merged 7 commits into from
Nov 4, 2019
2 changes: 1 addition & 1 deletion packages/botonic-plugin-contentful/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion packages/botonic-plugin-contentful/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"postversion": "git push && git push --tags"
},
"name": "@botonic/plugin-contentful",
"version": "0.9.24",
"version": "0.9.25",
"main": "lib/index.js",
"types": "lib/index.d.ts",
"repository": {
Expand Down
2 changes: 1 addition & 1 deletion packages/botonic-plugin-contentful/src/contentful/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import {
TopContent
} from '../cms';
import { ButtonDelivery } from './button';
import { DeliveryApi} from './delivery-api';
import { DeliveryApi } from './delivery-api';
import { CarouselDelivery } from './carousel';
import { StartUpDelivery } from './startup';
import { TextDelivery } from './text';
Expand Down
80 changes: 23 additions & 57 deletions packages/botonic-plugin-contentful/src/nlp/keywords.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,26 @@ import { SimilarWordFinder, SimilarWordResult } from './similar-words';
import { NormalizedUtterance, Normalizer } from './normalizer';
import { Locale } from './locales';

/**
* May contain multiple words
* TODO consider storing as a list of new Token class instances', each with a raw and stem fields
*/
export class Keyword {
readonly raw: string;
constructor(
readonly raw: string,
raw: string,
readonly stemmed: string,
readonly hasOnlyStopWords: boolean
) {}
) {
this.raw = raw.trim().toLowerCase();
}

splitInWords(): Keyword[] {
if (this.hasOnlyStopWords) {
return this.raw.split(' ').map(w => new Keyword(w, w, true));
}
return this.stemmed.split(' ').map(w => new Keyword(w, w, false));
}
}

export class CandidateWithKeywords<M> {
Expand All @@ -33,7 +47,7 @@ export enum SortType {
export class KeywordsOptions {
constructor(
readonly maxDistance = 1,
readonly similarWordsMinLength = 3,
readonly similarWordsMinMatchLength = 3,
readonly resultsSortType = SortType.LENGTH
) {}
}
Expand All @@ -50,7 +64,7 @@ export class KeywordsParser<M> {
) {
this.similar = new SimilarWordFinder<M>(
true,
options.similarWordsMinLength
options.similarWordsMinMatchLength
);
}

Expand Down Expand Up @@ -80,66 +94,18 @@ export class KeywordsParser<M> {
findCandidatesWithKeywordsAt(
utterance: NormalizedUtterance
): SimilarWordResult<M>[] {
let results: SimilarWordResult<M>[] = [];
switch (this.matchType) {
case MatchType.ONLY_KEYWORDS_FOUND:
results = this.similar.findSimilarKeyword(
utterance,
this.options.maxDistance
);
break;
case MatchType.KEYWORDS_AND_OTHERS_FOUND:
results = this.similar.findSubstring(
utterance,
this.options.maxDistance
);
break;
case MatchType.ALL_WORDS_IN_KEYWORDS_MIXED_UP:
results = this.mixedUp(utterance);
}
const results: SimilarWordResult<M>[] = this.similar.find(
this.matchType,
utterance,
this.options.maxDistance
);
return this.sort(results);
}

private mixedUp(utterance: NormalizedUtterance) {
if (this.options.maxDistance > 0) {
throw new Error(
'ALL_WORDS_IN_KEYWORDS_MIXED_UP does not support distance> 0'
);
}
const results: SimilarWordResult<M>[] = [];
for (const candidate of this.candidates) {
for (const keyword of candidate.keywords) {
if (this.containsAllWordsInKeyword(utterance, keyword)) {
results.push(
new SimilarWordResult<M>(
candidate.owner,
keyword,
keyword.stemmed,
0
)
);
}
}
}
return results;
}

private sort(results: SimilarWordResult<M>[]) {
if (this.options.resultsSortType === SortType.NONE) {
return results;
}
return results.sort((r1, r2) => r2.match.length - r1.match.length);
}

private containsAllWordsInKeyword(
utterance: NormalizedUtterance,
keyword: Keyword
): boolean {
for (const word of keyword.stemmed.split(' ')) {
if (!utterance.joinedStems.includes(word)) {
return false;
}
}
return true;
}
}
4 changes: 2 additions & 2 deletions packages/botonic-plugin-contentful/src/nlp/normalizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ export class NormalizedUtterance {
*
* @param raw
* @param tokens lowercase, with i18n characters converted to ascii
* @param stems lowercase, stemmed
* @param stems lowercase, stemmed. Equal to tokens if onlyStopWords==true
* @param onlyStopWords tokens are all stop words
*/
constructor(
Expand Down Expand Up @@ -101,7 +101,7 @@ export class Normalizer {
stems = stems.concat(...tokenStems);
}
if (stems.length == 0) {
console.log(`'${txt}' only contains stopwords. Not removing them`);
// console.log(`'${txt}' only contains stopwords. Not removing them`);
return new NormalizedUtterance(txt, tokens, tokens, true);
}
return new NormalizedUtterance(txt, tokens, stems, false);
Expand Down
Loading