From f27d3c68ad454719d71724f92693db57270827b9 Mon Sep 17 00:00:00 2001 From: Jason Dent Date: Mon, 31 Oct 2022 09:00:32 +0100 Subject: [PATCH] fix: Do not generate duplicate entries by default (#3793) Default `stripCaseAndAccentsKeepDuplicate` to `false` because it uses too much memory. --- .../src/compiler/__snapshots__/compile.test.ts.snap | 8 -------- .../cspell-tools/src/compiler/wordListCompiler.test.ts | 2 +- packages/cspell-tools/src/compiler/wordListParser.test.ts | 6 +++--- .../src/lib/SimpleDictionaryParser.test.ts | 4 ++-- .../cspell-trie-lib/src/lib/SimpleDictionaryParser.ts | 4 ++-- .../src/lib/suggestions/walker/hintedWalker.test.ts | 2 +- packages/cspell-trie-lib/src/lib/trie.test.ts | 2 +- 7 files changed, 10 insertions(+), 18 deletions(-) diff --git a/packages/cspell-tools/src/compiler/__snapshots__/compile.test.ts.snap b/packages/cspell-tools/src/compiler/__snapshots__/compile.test.ts.snap index cba89b17b1e..43dee45a46d 100644 --- a/packages/cspell-tools/src/compiler/__snapshots__/compile.test.ts.snap +++ b/packages/cspell-tools/src/compiler/__snapshots__/compile.test.ts.snap @@ -120,22 +120,14 @@ Code+ Error Error+ msg -~!codecode ~!codemsg -~!err ~!errorerror -~+code -~+code+ -~+error -~+error+ -~+msg ~cafe ~café ~code ~code+ ~error ~error+ -~msg " `; diff --git a/packages/cspell-tools/src/compiler/wordListCompiler.test.ts b/packages/cspell-tools/src/compiler/wordListCompiler.test.ts index 1d90ed94a23..1bc5432054e 100644 --- a/packages/cspell-tools/src/compiler/wordListCompiler.test.ts +++ b/packages/cspell-tools/src/compiler/wordListCompiler.test.ts @@ -111,7 +111,7 @@ describe('Validate the wordListCompiler', () => { const destName = path.join(temp, 'example0.txt'); await compileWordList(source, destName, compileOpt(false)); const output = await fsp.readFile(destName, 'utf8'); - expect(output).toBe(__testing__.wordListHeader + '\n' + 'hello\n~hello\ntry\n~try\nwork\n~work\n'); + expect(output).toBe(__testing__.wordListHeader + '\n' + 'hello\ntry\nwork\n'); expect(consoleOutput()).toMatchSnapshot(); }); diff --git a/packages/cspell-tools/src/compiler/wordListParser.test.ts b/packages/cspell-tools/src/compiler/wordListParser.test.ts index f922e6b4241..1e39efc1695 100644 --- a/packages/cspell-tools/src/compiler/wordListParser.test.ts +++ b/packages/cspell-tools/src/compiler/wordListParser.test.ts @@ -22,9 +22,9 @@ describe('Validate the wordListCompiler', () => { test.each` lines | sort | expectedResult - ${'banana|Apple|Apple|apple'} | ${true} | ${'Apple|apple|banana|~apple|~banana'} - ${'banana|Apple|Apple|apple|banana'} | ${false} | ${'banana|~banana|Apple|~apple|apple'} - ${'hello'} | ${true} | ${'hello|~hello'} + ${'banana|Apple|Apple|apple'} | ${true} | ${'Apple|apple|banana|~apple'} + ${'banana|Apple|Apple|apple|banana'} | ${false} | ${'banana|Apple|~apple|apple'} + ${'hello'} | ${true} | ${'hello'} ${'!Hello'} | ${true} | ${'!Hello|~!hello'} `('createSortAndFilterOperation $lines $sort', ({ lines, expectedResult, sort }) => { const normalizer = normalizeTargetWords({ sort, generateNonStrict: true }); diff --git a/packages/cspell-trie-lib/src/lib/SimpleDictionaryParser.test.ts b/packages/cspell-trie-lib/src/lib/SimpleDictionaryParser.test.ts index 44631a402a2..e138458fb58 100644 --- a/packages/cspell-trie-lib/src/lib/SimpleDictionaryParser.test.ts +++ b/packages/cspell-trie-lib/src/lib/SimpleDictionaryParser.test.ts @@ -152,7 +152,7 @@ describe('Validate SimpleDictionaryParser', () => { ${s('Word')} | ${s('Word|~word')} ${s('*error*')} | ${s('error|~error|error+|~error+|+error|~+error|+error+|~+error+')} `('parseDictionaryLines simple $lines', ({ lines, expected }) => { - const r = [...parseDictionaryLines(lines)]; + const r = [...parseDictionaryLines(lines, { stripCaseAndAccentsKeepDuplicate: true })]; expect(r).toEqual(expected); }); @@ -165,7 +165,7 @@ describe('Validate SimpleDictionaryParser', () => { ${s('# cspell-dictionary: generate-alternatives|Apple|Arizona|New York')} | ${{ stripCaseAndAccents: false }} | ${s('Apple|~apple|Arizona|~arizona|New York|~new york')} ${s('Apple| # cspell-dictionary: no-generate-alternatives|Arizona|New York')} | ${{}} | ${s('Apple|~apple|Arizona|New York')} ${dictionary3()} | ${{ stripCaseAndAccentsKeepDuplicate: false }} | ${s('Error|~error|Error+|~error+|+error|+error+|Code|~code|Code+|~code+|+code|+code+|msg|+msg|!err|!Errorerror|!Codemsg|Café|~café|~cafe|!codecode')} - ${s('# cspell-dictionary: split|"New York"|Tower of London')} | ${{}} | ${s('New York|Tower|~tower|of|~of|London|~london')} + ${s('# cspell-dictionary: split|"New York"|Tower of London')} | ${{ stripCaseAndAccentsKeepDuplicate: true }} | ${s('New York|Tower|~tower|of|~of|London|~london')} ${s('Hello|!Goodbye')} | ${{}} | ${s('Hello|~hello|!Goodbye')} ${s('Hello|!Goodbye')} | ${{ stripCaseAndAccentsOnForbidden: true }} | ${s('Hello|~hello|!Goodbye|~!goodbye')} `('parseDictionaryLines complex $lines', ({ lines, options, expected }) => { diff --git a/packages/cspell-trie-lib/src/lib/SimpleDictionaryParser.ts b/packages/cspell-trie-lib/src/lib/SimpleDictionaryParser.ts index ef79284b2f8..3b0f2c90a2b 100644 --- a/packages/cspell-trie-lib/src/lib/SimpleDictionaryParser.ts +++ b/packages/cspell-trie-lib/src/lib/SimpleDictionaryParser.ts @@ -36,7 +36,7 @@ export interface ParseDictionaryOptions { /** * Tell the parser to keep non-case/accent version in both forms. - * @default true + * @default false */ stripCaseAndAccentsKeepDuplicate: boolean; @@ -74,7 +74,7 @@ const _defaultOptions: ParseDictionaryOptions = { caseInsensitivePrefix: CASE_INSENSITIVE_PREFIX, keepExactPrefix: IDENTITY_PREFIX, stripCaseAndAccents: true, - stripCaseAndAccentsKeepDuplicate: true, + stripCaseAndAccentsKeepDuplicate: false, stripCaseAndAccentsOnForbidden: false, split: false, splitKeepBoth: false, diff --git a/packages/cspell-trie-lib/src/lib/suggestions/walker/hintedWalker.test.ts b/packages/cspell-trie-lib/src/lib/suggestions/walker/hintedWalker.test.ts index d7fdf92de69..15ff38c8010 100644 --- a/packages/cspell-trie-lib/src/lib/suggestions/walker/hintedWalker.test.ts +++ b/packages/cspell-trie-lib/src/lib/suggestions/walker/hintedWalker.test.ts @@ -45,7 +45,7 @@ describe('Validate Util Functions', () => { test.each` dict | ignoreCase | sep | depth | compoundMethod | expected - ${['A*', '+a*', '*b*', '+c']} | ${true} | ${''} | ${2} | ${undefined} | ${['A', 'Aa', 'Ab', 'Ac', 'Aa', 'Ab', 'Ac', 'b', 'ba', 'bb', 'bc', 'ba', 'bb', 'bc', 'a', 'aa', 'ab', 'ac', 'aa', 'ab', 'ac', 'b', 'ba', 'bb', 'bc', 'ba', 'bb', 'bc']} + ${['A*', '+a*', '*b*', '+c']} | ${true} | ${''} | ${2} | ${undefined} | ${['A', 'Aa', 'Ab', 'Ac', 'b', 'ba', 'bb', 'bc', 'a', 'aa', 'ab', 'ac']} ${['A*', '+a*', '*b*', '+c']} | ${false} | ${''} | ${2} | ${undefined} | ${['A', 'Aa', 'Ab', 'Ac', 'b', 'ba', 'bb', 'bc']} ${['A*', '+b+', '+C']} | ${false} | ${'•'} | ${3} | ${CompoundWordsMethod.NONE} | ${['A', 'A•C', 'A•b•C']} ${['A*', '+b+', '+C']} | ${false} | ${'•'} | ${3} | ${CompoundWordsMethod.JOIN_WORDS} | ${['A', 'A•C', 'A•b•C', 'A+A', 'A+C']} diff --git a/packages/cspell-trie-lib/src/lib/trie.test.ts b/packages/cspell-trie-lib/src/lib/trie.test.ts index e3eaf91c4e4..c612f920545 100644 --- a/packages/cspell-trie-lib/src/lib/trie.test.ts +++ b/packages/cspell-trie-lib/src/lib/trie.test.ts @@ -166,7 +166,7 @@ describe('Validate Trie Class', () => { `); expect(trieModern.isSizeKnown()).toBe(false); - expect(trieModern.size()).toBe(11); // begin, begin+, end, +end, café ~cafe + expect(trieModern.size()).toBe(6); // begin, begin+, end, +end, café ~cafe expect(trieModern.isSizeKnown()).toBe(true); });