Skip to content

Commit

Permalink
Merge pull request #25 from jonahharris/with-topk-count-support-in-add
Browse files Browse the repository at this point in the history
Add support for count parameter in TopK add method.
  • Loading branch information
Callidon authored Jun 25, 2021
2 parents 680f534 + 37b6f2a commit df4b41f
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 5 deletions.
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ interface TopkElement {

#### Methods

* `add(element: string) -> void`: add a new occurence of an element to the sketch.
* `add(element: string, count: number = 1) -> void`: add one or more new occurences of an element to the sketch.
* `values() -> Array<TopkElement>`: get the top-k values as an array of objects.
* `iterator() -> Iterator<TopkElement>`: get the top-k values as an iterator that yields objects.

Expand All @@ -366,11 +366,15 @@ const { TopK } = require('bloom-filters')
// create a new TopK with k = 10, an error rate of 0.001 and an accuracy of 0.99
const topk = new TopK(10, 0.001, 0.99)

// push some occurrences in the multiset
// push occurrences one-at-a-time in the multiset
topk.add('alice')
topk.add('bob')
topk.add('alice')

// or, equally, push multiple occurrences at-once in the multiset
// topk.add('alice', 2)
// topk.add('bob', 1)

// print the top k values
for(let item of topk.values()) {
console.log(`Item "${item.value}" is in position ${item.rank} with an estimated frequency of ${item.frequency}`)
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "bloom-filters",
"version": "1.3.4",
"version": "1.3.5",
"description": "JS implementation of probabilistic data structures: Bloom Filter (and its derived), HyperLogLog, Count-Min Sketch, Top-K and MinHash",
"main": "dist/api.js",
"scripts": {
Expand Down
7 changes: 5 additions & 2 deletions src/sketch/topk.ts
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,11 @@ export default class TopK extends BaseFilter {
* Add an element to the TopK
* @param element - Element to add
*/
add (element: string): void {
this._sketch.update(element)
add (element: string, count: number = 1): void {
if (0 >= count) {
throw (`count must be > 0 (was ${count})`)
}
this._sketch.update(element, count)
const frequency = this._sketch.count(element)

if (this._heap.length < this._k || frequency >= this._heap.get(0)!.frequency) {
Expand Down
59 changes: 59 additions & 0 deletions test/topk-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,65 @@ describe('TopK', () => {

const expectedTop = ['alice', 'bob', 'carol']

describe('#add', () => {
it('should produce equivalent TopK estimations when using count parameter', () => {
const k = 3
const errorRate = 0.001
const accuracy = 0.999
let freqTable = {}

/*
* Add items to the traditional one-at-a-time variant while concurrently
* building a frequency table to be used for the all-at-once variant.
*/
const topkOneAtATime = new TopK(k, errorRate, accuracy)
for (const item of lessThanOrEqualTestCaseItems) {
topkOneAtATime.add(item)
if (!Object.hasOwnProperty.call(freqTable, item)) {
freqTable[`${item}`] = 0
}
++freqTable[`${item}`]
}

/* Ensure the built frequency table is correct. */
const expectedFreqTable = lessThanOrEqualTestCaseItems.reduce(
function (acc, curr) {

if (!Object.hasOwnProperty.call(acc, curr)) {
acc[`${curr}`] = 1
} else {
++acc[`${curr}`]
}

return acc
}, {})
freqTable.should.to.deep.equal(expectedFreqTable)

/* Build a version of TopK using the frequency as count */
const topkAllAtOnce = new TopK(k, errorRate, accuracy)
for (const [item, freq] of Object.entries(freqTable)) {
topkAllAtOnce.add(item, freq)
}

const topkOneAtATimeValues = topkOneAtATime.values()
const topkOneAtATimeKeys = topkOneAtATimeValues.map(({value}) => value)
const topkAllAtOnceValues = topkAllAtOnce.values()
const topkAllAtOnceKeys = topkAllAtOnceValues.map(({value}) => value)

/* Make sure all expected lengths match */
expectedTop.should.to.have.lengthOf(k)
topkOneAtATimeKeys.should.to.have.lengthOf(expectedTop.length)
topkAllAtOnceKeys.should.to.have.lengthOf(topkOneAtATimeKeys.length)

/* Make sure all expected keys match */
topkOneAtATimeKeys.should.to.deep.equal(expectedTop)
topkAllAtOnceKeys.should.to.deep.equal(topkOneAtATimeKeys)

/* Make sure the objects themselves match */
topkAllAtOnceValues.should.to.deep.equal(topkOneAtATimeValues)
})
})

describe('#values', () => {
it('should produce valid TopK estimations when there are fewer than K items', () => {
const topk = new TopK(10, 0.001, 0.999)
Expand Down

0 comments on commit df4b41f

Please sign in to comment.