diff --git a/package.json b/package.json index 95ddac1..b9181b1 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "bloom-filters", - "version": "1.3.3", + "version": "1.3.4", "description": "JS implementation of probabilistic data structures: Bloom Filter (and its derived), HyperLogLog, Count-Min Sketch, Top-K and MinHash", "main": "dist/api.js", "scripts": { @@ -25,7 +25,8 @@ ], "author": "Thomas Minier ", "contributors": [ - "Arnaud Grall " + "Arnaud Grall ", + "Jonah H. Harris " ], "license": "MIT", "bugs": { diff --git a/src/sketch/topk.ts b/src/sketch/topk.ts index 69d1059..69b791b 100644 --- a/src/sketch/topk.ts +++ b/src/sketch/topk.ts @@ -211,7 +211,7 @@ export default class TopK extends BaseFilter { */ values (): TopkElement[] { const res = [] - for (let i = this._heap.length - 1; i > 0; i--) { + for (let i = this._heap.length - 1; i >= 0; i--) { const elt = this._heap.get(i)! res.push({ value: elt.value, @@ -231,7 +231,7 @@ export default class TopK extends BaseFilter { iterator (): Iterator { const heap = this._heap return function *() { - for (let i = heap.length - 1; i > 0; i--) { + for (let i = heap.length - 1; i >= 0; i--) { const elt = heap.get(i)! yield { value: elt.value, diff --git a/test/topk-test.js b/test/topk-test.js index 08289ee..65d5b78 100644 --- a/test/topk-test.js +++ b/test/topk-test.js @@ -24,48 +24,73 @@ SOFTWARE. 'use strict' + require('chai').should() const { TopK } = require('../dist/api.js') describe('TopK', () => { + const lessThanOrEqualTestCaseItems = [ + 'alice', 'bob', 'alice', 'carol', + 'bob', 'alice' + ] + + const moreThanTestCaseItems = [ + 'alice', 'daniel', 'esther', 'bob', + 'alice', 'bob', 'alice', 'carol', + 'carol', 'alice', 'bob' + ] + const expectedTop = ['alice', 'bob', 'carol'] describe('#values', () => { - it('should produce valid TopK estimations', () => { - const topk = new TopK(3, 0.001, 0.999) - topk.add('alice') - topk.add('bob') - topk.add('alice') - topk.add('carol') - topk.add('bob') - topk.add('alice') + it('should produce valid TopK estimations when there are fewer than K items', () => { + const topk = new TopK(10, 0.001, 0.999) + for (let item of lessThanOrEqualTestCaseItems) { + topk.add(item) + } let i = 0 let prev = { frequency: Infinity } - for (let current of topk.iterator()) { + for (let current of topk.values()) { + current.should.have.all.keys('value', 'rank', 'frequency') current.value.should.equal(expectedTop[i]) current.frequency.should.be.below(prev.frequency) current.rank.should.equal(i + 1) prev = current i++ } + + i.should.equal(expectedTop.length) }) - it('should produce valid estimations when there are more than K items', () => { + it('should produce valid TopK estimations when there are exactly K items', () => { const topk = new TopK(3, 0.001, 0.999) - topk.add('alice') - topk.add('daniel') - topk.add('esther') - topk.add('bob') - topk.add('alice') - topk.add('bob') - topk.add('alice') - topk.add('carol') - topk.add('carol') - topk.add('alice') + for (let item of lessThanOrEqualTestCaseItems) { + topk.add(item) + } + let i = 0 let prev = { frequency: Infinity } + for (let current of topk.values()) { + current.should.have.all.keys('value', 'rank', 'frequency') + current.value.should.equal(expectedTop[i]) + current.frequency.should.be.below(prev.frequency) + current.rank.should.equal(i + 1) + prev = current + i++ + } + + i.should.equal(expectedTop.length) + }) + + it('should produce valid TopK estimations when there are more than K items', () => { + const topk = new TopK(3, 0.001, 0.999) + for (let item of moreThanTestCaseItems) { + topk.add(item) + } + let i = 0 + let prev = { frequency: Infinity } for (let current of topk.values()) { current.should.have.all.keys('value', 'rank', 'frequency') current.value.should.equal(expectedTop[i]) @@ -74,18 +99,37 @@ describe('TopK', () => { prev = current i++ } + + i.should.equal(expectedTop.length) }) }) describe('#iterator', () => { - it('should produce valid TopK estimations', () => { + it('should produce valid TopK estimations when there are fewer than K items', () => { + const topk = new TopK(10, 0.001, 0.999) + for (let item of lessThanOrEqualTestCaseItems) { + topk.add(item) + } + + let i = 0 + let prev = { frequency: Infinity } + for (let current of topk.iterator()) { + current.should.have.all.keys('value', 'rank', 'frequency') + current.value.should.equal(expectedTop[i]) + current.frequency.should.be.below(prev.frequency) + current.rank.should.equal(i + 1) + prev = current + i++ + } + + i.should.equal(expectedTop.length) + }) + + it('should produce valid TopK estimations when there are exactly K items', () => { const topk = new TopK(3, 0.001, 0.999) - topk.add('alice') - topk.add('bob') - topk.add('alice') - topk.add('carol') - topk.add('bob') - topk.add('alice') + for (let item of lessThanOrEqualTestCaseItems) { + topk.add(item) + } let i = 0 let prev = { frequency: Infinity } @@ -97,20 +141,15 @@ describe('TopK', () => { prev = current i++ } + + i.should.equal(expectedTop.length) }) it('should produce valid estimations when there are more than K items', () => { const topk = new TopK(3, 0.001, 0.999) - topk.add('alice') - topk.add('daniel') - topk.add('esther') - topk.add('bob') - topk.add('alice') - topk.add('bob') - topk.add('alice') - topk.add('carol') - topk.add('carol') - topk.add('alice') + for (let item of moreThanTestCaseItems) { + topk.add(item) + } let i = 0 let prev = { frequency: Infinity } @@ -122,6 +161,8 @@ describe('TopK', () => { prev = current i++ } + + i.should.equal(expectedTop.length) }) })