Skip to content

Commit

Permalink
Fix to off-by-one for TopK and related test cases
Browse files Browse the repository at this point in the history
  • Loading branch information
jonahharris committed Apr 19, 2021
1 parent bf2d2bb commit d22cab2
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 41 deletions.
5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "bloom-filters",
"version": "1.3.3",
"version": "1.3.4",
"description": "JS implementation of probabilistic data structures: Bloom Filter (and its derived), HyperLogLog, Count-Min Sketch, Top-K and MinHash",
"main": "dist/api.js",
"scripts": {
Expand All @@ -25,7 +25,8 @@
],
"author": "Thomas Minier <[email protected]>",
"contributors": [
"Arnaud Grall <[email protected]>"
"Arnaud Grall <[email protected]>",
"Jonah H. Harris <[email protected]>"
],
"license": "MIT",
"bugs": {
Expand Down
4 changes: 2 additions & 2 deletions src/sketch/topk.ts
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ export default class TopK extends BaseFilter {
*/
values (): TopkElement[] {
const res = []
for (let i = this._heap.length - 1; i > 0; i--) {
for (let i = this._heap.length - 1; i >= 0; i--) {
const elt = this._heap.get(i)!
res.push({
value: elt.value,
Expand All @@ -231,7 +231,7 @@ export default class TopK extends BaseFilter {
iterator (): Iterator<TopkElement> {
const heap = this._heap
return function *() {
for (let i = heap.length - 1; i > 0; i--) {
for (let i = heap.length - 1; i >= 0; i--) {
const elt = heap.get(i)!
yield {
value: elt.value,
Expand Down
115 changes: 78 additions & 37 deletions test/topk-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,48 +24,73 @@ SOFTWARE.

'use strict'


require('chai').should()
const { TopK } = require('../dist/api.js')

describe('TopK', () => {
const lessThanOrEqualTestCaseItems = [
'alice', 'bob', 'alice', 'carol',
'bob', 'alice'
]

const moreThanTestCaseItems = [
'alice', 'daniel', 'esther', 'bob',
'alice', 'bob', 'alice', 'carol',
'carol', 'alice', 'bob'
]

const expectedTop = ['alice', 'bob', 'carol']

describe('#values', () => {
it('should produce valid TopK estimations', () => {
const topk = new TopK(3, 0.001, 0.999)
topk.add('alice')
topk.add('bob')
topk.add('alice')
topk.add('carol')
topk.add('bob')
topk.add('alice')
it('should produce valid TopK estimations when there are fewer than K items', () => {
const topk = new TopK(10, 0.001, 0.999)
for (let item of lessThanOrEqualTestCaseItems) {
topk.add(item)
}

let i = 0
let prev = { frequency: Infinity }
for (let current of topk.iterator()) {
for (let current of topk.values()) {
current.should.have.all.keys('value', 'rank', 'frequency')
current.value.should.equal(expectedTop[i])
current.frequency.should.be.below(prev.frequency)
current.rank.should.equal(i + 1)
prev = current
i++
}

i.should.equal(expectedTop.length)
})

it('should produce valid estimations when there are more than K items', () => {
it('should produce valid TopK estimations when there are exactly K items', () => {
const topk = new TopK(3, 0.001, 0.999)
topk.add('alice')
topk.add('daniel')
topk.add('esther')
topk.add('bob')
topk.add('alice')
topk.add('bob')
topk.add('alice')
topk.add('carol')
topk.add('carol')
topk.add('alice')
for (let item of lessThanOrEqualTestCaseItems) {
topk.add(item)
}

let i = 0
let prev = { frequency: Infinity }
for (let current of topk.values()) {
current.should.have.all.keys('value', 'rank', 'frequency')
current.value.should.equal(expectedTop[i])
current.frequency.should.be.below(prev.frequency)
current.rank.should.equal(i + 1)
prev = current
i++
}

i.should.equal(expectedTop.length)
})

it('should produce valid TopK estimations when there are more than K items', () => {
const topk = new TopK(3, 0.001, 0.999)
for (let item of moreThanTestCaseItems) {
topk.add(item)
}

let i = 0
let prev = { frequency: Infinity }
for (let current of topk.values()) {
current.should.have.all.keys('value', 'rank', 'frequency')
current.value.should.equal(expectedTop[i])
Expand All @@ -74,18 +99,37 @@ describe('TopK', () => {
prev = current
i++
}

i.should.equal(expectedTop.length)
})
})

describe('#iterator', () => {
it('should produce valid TopK estimations', () => {
it('should produce valid TopK estimations when there are fewer than K items', () => {
const topk = new TopK(10, 0.001, 0.999)
for (let item of lessThanOrEqualTestCaseItems) {
topk.add(item)
}

let i = 0
let prev = { frequency: Infinity }
for (let current of topk.iterator()) {
current.should.have.all.keys('value', 'rank', 'frequency')
current.value.should.equal(expectedTop[i])
current.frequency.should.be.below(prev.frequency)
current.rank.should.equal(i + 1)
prev = current
i++
}

i.should.equal(expectedTop.length)
})

it('should produce valid TopK estimations when there are exactly K items', () => {
const topk = new TopK(3, 0.001, 0.999)
topk.add('alice')
topk.add('bob')
topk.add('alice')
topk.add('carol')
topk.add('bob')
topk.add('alice')
for (let item of lessThanOrEqualTestCaseItems) {
topk.add(item)
}

let i = 0
let prev = { frequency: Infinity }
Expand All @@ -97,20 +141,15 @@ describe('TopK', () => {
prev = current
i++
}

i.should.equal(expectedTop.length)
})

it('should produce valid estimations when there are more than K items', () => {
const topk = new TopK(3, 0.001, 0.999)
topk.add('alice')
topk.add('daniel')
topk.add('esther')
topk.add('bob')
topk.add('alice')
topk.add('bob')
topk.add('alice')
topk.add('carol')
topk.add('carol')
topk.add('alice')
for (let item of moreThanTestCaseItems) {
topk.add(item)
}

let i = 0
let prev = { frequency: Infinity }
Expand All @@ -122,6 +161,8 @@ describe('TopK', () => {
prev = current
i++
}

i.should.equal(expectedTop.length)
})
})

Expand Down

0 comments on commit d22cab2

Please sign in to comment.