Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: cspell-tools - limit memory usage when build dictionaries #2087

Merged
merged 1 commit into from
Dec 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cspell-dict.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ repo
repos
retryable
serializers
sigs
specberus
streetsidesoftware
submodule
Expand Down
1 change: 1 addition & 0 deletions packages/cspell-trie-lib/cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"tsbuildinfo"
],
"import": [
"../../cspell.json",
"@cspell/dict-es-es/cspell-ext.json"
]
}
48 changes: 47 additions & 1 deletion packages/cspell-trie-lib/src/lib/TrieBuilder.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import { countNodes, isCircular } from './util';
import { TrieBuilder, buildTrie } from './TrieBuilder';
import { TrieBuilder, buildTrie, __testing__ } from './TrieBuilder';
import { TrieNode } from '.';

const { trimSignatures, trimMap } = __testing__;

describe('Validate TrieBuilder', () => {
test('builder explicit consolidateSuffixes', () => {
Expand Down Expand Up @@ -31,6 +34,49 @@ describe('Validate TrieBuilder', () => {
const trie = buildTrie(sampleWords);
expect([...trie.words()]).toEqual(sampleWords.sort());
});

test('trimSignatures', () => {
const n: TrieNode = {};
const sigs = sampleWords;
const soloSigs = sigs.filter((_, i) => !!(i & 1));
const signatures = new Map(sigs.map((w) => [w, n]));
const solo = new Set(soloSigs);

// verify preconditions
expect(signatures.size).toBe(sigs.length);
expect(solo.size).toBe(soloSigs.length);

// Nothing should change, solo is within bounds.
trimSignatures(signatures, solo, sampleWords.length);
expect(signatures.size).toBe(sigs.length);
expect(solo.size).toBe(soloSigs.length);

// trim and make sure the newest values are left.
trimSignatures(signatures, solo, 5, 10);
expect(signatures.size).toBe(sigs.length - soloSigs.length + 5);
expect(solo.size).toBe(5);
// verify newest are left
expect([...solo]).toEqual(soloSigs.slice(-5));
});

test('trimMap', () => {
const n: TrieNode = {};
const values = sampleWords;
const mapOfValues = new Map(values.map((w) => [w, n]));

// verify preconditions
expect(mapOfValues.size).toBe(values.length);

// Nothing should change, solo is within bounds.
trimMap(mapOfValues, sampleWords.length);
expect(mapOfValues.size).toBe(values.length);

// trim and make sure the newest values are left.
trimMap(mapOfValues, 5, 10);
expect(mapOfValues.size).toBe(5);
// verify newest are left
expect([...mapOfValues.keys()]).toEqual(values.slice(-5));
});
});

const sampleWords = [
Expand Down
48 changes: 47 additions & 1 deletion packages/cspell-trie-lib/src/lib/TrieBuilder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,15 @@ interface PathNode {
n: TrieNode;
}

// cspell:words sigs
const MAX_NUM_SOLO_SIGS = 100000;
const MAX_TRANSFORMS = 1000000;
const CACHE_PADDING = 1000;

export class TrieBuilder {
private count = 0;
private readonly signatures = new Map<string, TrieNode>();
private readonly soloSignatures = new Set<string>();
private readonly cached = new Map<TrieNode, number>();
private readonly transforms = new Map<TrieNode, Map<string, TrieNode>>();
private _eow: TrieNode = Object.freeze({ f: 1 });
Expand Down Expand Up @@ -100,9 +106,11 @@ export class TrieBuilder {
const sig = this.signature(n);
const ref = this.signatures.get(sig);
if (ref !== undefined) {
this.soloSignatures.delete(sig);
return this.tryCacheFrozen(ref);
}

this.soloSignatures.add(sig);
trimSignatures(this.signatures, this.soloSignatures, MAX_NUM_SOLO_SIGS);
this.signatures.set(sig, this.freeze(n));
return n;
}
Expand All @@ -111,6 +119,7 @@ export class TrieBuilder {
if (!Object.isFrozen(result) || !Object.isFrozen(src)) return;
const t = this.transforms.get(src) ?? new Map<string, TrieNode>();
t.set(s, result);
trimMap(this.transforms, MAX_TRANSFORMS);
this.transforms.set(src, t);
}

Expand Down Expand Up @@ -209,6 +218,10 @@ export class TrieBuilder {
this._root = createTrieRoot(this.trieOptions);
this.cached.clear();
this.signatures.clear();
this.signatures.set(this.signature(this._eow), this._eow);
this.soloSignatures.clear();
this.count = 0;
this.cached.set(this._eow, this.count++);
}

build(consolidateSuffixes = false): Trie {
Expand All @@ -224,3 +237,36 @@ function copyIfFrozen(n: TrieNode): TrieNode {
const c = n.c ? new Map(n.c) : undefined;
return { f: n.f, c };
}

function trimSignatures(
signatures: Map<string, TrieNode>,
soloSignatures: Set<string>,
size: number,
padding = CACHE_PADDING
): void {
if (soloSignatures.size >= size + padding) {
for (const soloSig of soloSignatures) {
signatures.delete(soloSig);
soloSignatures.delete(soloSig);
if (soloSignatures.size <= size) {
break;
}
}
}
}

function trimMap(map: Map<unknown, unknown>, size: number, padding = CACHE_PADDING) {
if (map.size >= size + padding) {
for (const key of map.keys()) {
map.delete(key);
if (map.size <= size) {
break;
}
}
}
}

export const __testing__ = {
trimSignatures,
trimMap,
};
2 changes: 1 addition & 1 deletion packages/cspell-trie-lib/src/lib/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ export { TrieNode, FLAG_WORD, ChildMap, TrieRoot } from './TrieNode';
export * from './util';
export * from './walker';
export * from './importExport';
export * from './TrieBuilder';
export { buildTrie, buildTrieFast, TrieBuilder } from './TrieBuilder';
export * from './consolidate';
export { SuggestionResult, MaxCost, suggestionCollector, SuggestionCollector } from './suggestCollector';
export { parseDictionaryLines, parseDictionary } from './SimpleDictionaryParser';