Skip to content

Commit

Permalink
Add Ring[BigDecimal], modeled after Ring[BigInt]
Browse files Browse the repository at this point in the history
 (Added a custom generator for BigDecimals, to reduce somewhat the risk of underflow)
  • Loading branch information
Cyrille Chépélov (TP12) committed Oct 19, 2016
1 parent e2fe3e2 commit d83922c
Show file tree
Hide file tree
Showing 11 changed files with 92 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import CMSFunctions.generateHashes
* We benchmark different `K` types as well as different input data streams.
*/
object CMSBenchmark {

import CMSHasherImplicits.CMSHasherBigInt

@State(Scope.Benchmark)
Expand All @@ -36,24 +35,34 @@ object CMSBenchmark {
var smallLongs: Vector[Long] = _
var smallBigInts: Vector[BigInt] = _
var largeBigInts: Vector[BigInt] = _
var smallBigDecimals: Vector[BigDecimal] = _
var largeBigDecimals: Vector[BigDecimal] = _
var largeStrings: Vector[String] = _

// need to initialize later because we don't have `eps` and `delta` yet.
var longMonoid: CMSMonoid[Long] = _
var bigIntMonoid: CMSMonoid[BigInt] = _
var bigDecimalMonoid: CMSMonoid[BigDecimal] = _
var stringMonoid: CMSMonoid[String] = _

@Setup(Level.Trial)
def setup(): Unit = {
longMonoid = CMS.monoid[Long](eps, delta, Seed)
bigIntMonoid = CMS.monoid[BigInt](eps, delta, Seed)
bigDecimalMonoid = CMS.monoid[BigDecimal](eps, delta, Seed)
stringMonoid = CMS.monoid[String](eps, delta, Seed)

val bitsPerChar = 16
largeStrings = (1 to size).map(i => nextString(MaxBits / bitsPerChar)).toVector
largeBigInts = largeStrings.map(s => BigInt(s.getBytes)).toVector
largeBigInts = largeStrings.map(s => BigInt(s.getBytes))
largeBigDecimals = largeStrings.map(s => {
val md = (s.head % 256) - 128
BigDecimal(BigInt(s.tail.getBytes)) * BigDecimal(1).pow(md)
})

smallLongs = (1 to size).map(_.toLong).toVector
smallBigInts = (1 to size).map(BigInt(_)).toVector
smallBigDecimals = (1 to size).map(BigDecimal(_) + BigDecimal(1).pow(-size)).toVector
}

}
Expand All @@ -77,6 +86,14 @@ class CMSBenchmark {
def sumLargeBigIntCms(st: CMSState): CMS[BigInt] =
sumCmsVector(st.largeBigInts, st.bigIntMonoid)

@Benchmark
def sumSmallBigDecimalCms(st: CMSState): CMS[BigDecimal] =
sumCmsVector(st.smallBigDecimals, st.bigDecimalMonoid)

@Benchmark
def sumLargeBigDecimalCms(st: CMSState): CMS[BigDecimal] =
sumCmsVector(st.largeBigDecimals, st.bigDecimalMonoid)

@Benchmark
def sumLargeStringCms(st: CMSState): CMS[String] =
sumCmsVector(st.largeStrings, st.stringMonoid)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,23 +36,33 @@ object TopCMSBenchmark {
var smallLongs: Vector[Long] = _
var smallBigInts: Vector[BigInt] = _
var largeBigInts: Vector[BigInt] = _
var smallBigDecimals: Vector[BigDecimal] = _
var largeBigDecimals: Vector[BigDecimal] = _
var largeStrings: Vector[String] = _

var cmsLongMonoid: TopPctCMSMonoid[Long] = _
var cmsBigIntMonoid: TopPctCMSMonoid[BigInt] = _
var cmsBigDecimalMonoid: TopPctCMSMonoid[BigDecimal] = _
var cmsStringMonoid: TopPctCMSMonoid[String] = _

@Setup(Level.Trial)
def setup(): Unit = {
cmsLongMonoid = TopPctCMS.monoid[Long](eps, delta, Seed, pct)
cmsBigIntMonoid = TopPctCMS.monoid[BigInt](eps, delta, Seed, pct)
cmsBigDecimalMonoid = TopPctCMS.monoid[BigDecimal](eps, delta, Seed, pct)
cmsStringMonoid = TopPctCMS.monoid[String](eps, delta, Seed, pct)

val bitsPerChar = 16
largeStrings = (1 to size).map(i => nextString(MaxBits / bitsPerChar)).toVector
largeBigInts = largeStrings.map(s => BigInt(s.getBytes)).toVector
largeBigInts = largeStrings.map(s => BigInt(s.getBytes))
largeBigDecimals = largeStrings.map(s => {
val md = (s.head % 256) - 128
BigDecimal(BigInt(s.tail.getBytes)) * BigDecimal(1).pow(md)
})

smallLongs = (1 to size).map(_.toLong).toVector
smallBigInts = (1 to size).map(BigInt(_)).toVector
smallBigDecimals = (1 to size).map(BigDecimal(_) + BigDecimal(1).pow(-size)).toVector
}
}

Expand All @@ -75,6 +85,14 @@ class TopCMSBenchmark {
def sumLargeBigIntTopCms(st: CMSState) =
sumTopCmsVector(st.largeBigInts, st.cmsBigIntMonoid)

@Benchmark
def sumSmallBigDecimalTopCms(st: CMSState) =
sumTopCmsVector(st.smallBigDecimals, st.cmsBigDecimalMonoid)

@Benchmark
def sumLargeBigDecimalTopCms(st: CMSState) =
sumTopCmsVector(st.largeBigDecimals, st.cmsBigDecimalMonoid)

@Benchmark
def sumLargeStringTopCms(st: CMSState) =
sumTopCmsVector(st.largeStrings, st.cmsStringMonoid)
Expand Down
16 changes: 16 additions & 0 deletions algebird-core/src/main/scala/com/twitter/algebird/CMSHasher.scala
Original file line number Diff line number Diff line change
Expand Up @@ -138,4 +138,20 @@ object CMSHasher {
override def hash(a: Int, b: Int, width: Int)(x: Array[Byte]): Int = hashBytes(a, b, width)(x)
}

// Note: CMSHasher[BigInt] not provided here but in CMSHasherImplicits for legacy support reasons. New hashers
// should come here.

implicit object CMSHasherBigDecimal extends CMSHasher[BigDecimal] {
override def hash(a: Int, b: Int, width: Int)(x: BigDecimal): Int = {

val uh = scala.util.hashing.MurmurHash3.arrayHash(x.underlying.unscaledValue.toByteArray, a)
val hash = scala.util.hashing.MurmurHash3.productHash((uh, x.scale), a)

// We only want positive integers for the subsequent modulo. This method mimics Java's Hashtable
// implementation. The Java code uses `0x7FFFFFFF` for the bit-wise AND, which is equal to Int.MaxValue.
val positiveHash = hash & Int.MaxValue
positiveHash % width
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ object Group extends GeneratedGroupImplicits with ProductGroups {
implicit val jshortGroup: Group[JShort] = JShortRing
implicit val longGroup: Group[Long] = LongRing
implicit val bigIntGroup: Group[BigInt] = BigIntRing
implicit val bigDecimalGroup: Group[BigDecimal] = BigDecimalRing
implicit val jlongGroup: Group[JLong] = JLongRing
implicit val floatGroup: Group[Float] = FloatField
implicit val jfloatGroup: Group[JFloat] = JFloatField
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ object Monoid extends GeneratedMonoidImplicits with ProductMonoids {
implicit val shortMonoid: Monoid[Short] = ShortRing
implicit val jshortMonoid: Monoid[JShort] = JShortRing
implicit val bigIntMonoid: Monoid[BigInt] = BigIntRing
implicit val bigDecimalMonoid: Monoid[BigDecimal] = BigDecimalRing
implicit val longMonoid: Monoid[Long] = LongRing
implicit val jlongMonoid: Monoid[JLong] = JLongRing
implicit val floatMonoid: Monoid[Float] = FloatField
Expand Down
2 changes: 2 additions & 0 deletions algebird-core/src/main/scala/com/twitter/algebird/Ring.scala
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ object LongRing extends Ring[Long] {
}

object BigIntRing extends NumericRing[BigInt]
object BigDecimalRing extends NumericRing[BigDecimal]

object Ring extends GeneratedRingImplicits with ProductRings {
// This pattern is really useful for typeclasses
Expand All @@ -139,6 +140,7 @@ object Ring extends GeneratedRingImplicits with ProductRings {
implicit val jshortRing: Ring[JShort] = JShortRing
implicit val longRing: Ring[Long] = LongRing
implicit val bigIntRing: Ring[BigInt] = BigIntRing
implicit val bigDecimalRing: Ring[BigDecimal] = BigDecimalRing
implicit val jlongRing: Ring[JLong] = JLongRing
implicit val floatRing: Ring[Float] = FloatField
implicit val jfloatRing: Ring[JFloat] = JFloatField
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ object Semigroup extends GeneratedSemigroupImplicits with ProductSemigroups {
implicit val jshortSemigroup: Semigroup[JShort] = JShortRing
implicit val longSemigroup: Semigroup[Long] = LongRing
implicit val bigIntSemigroup: Semigroup[BigInt] = BigIntRing
implicit val bigDecimalSemigroup: Semigroup[BigDecimal] = BigDecimalRing
implicit val jlongSemigroup: Semigroup[JLong] = JLongRing
implicit val floatSemigroup: Semigroup[Float] = FloatField
implicit val jfloatSemigroup: Semigroup[JFloat] = JFloatField
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,24 @@ limitations under the License.

package com.twitter.algebird

import org.scalacheck.Arbitrary
import org.scalacheck.Prop
import org.scalacheck.{Arbitrary, Gen, Prop}
import org.scalacheck.Prop.forAll

import scala.math.Equiv

/**
* Base properties useful for all tests using Algebird's typeclasses.
*/

object BaseProperties {
val arbReasonableBigDecimals: Arbitrary[BigDecimal] = Arbitrary(
for {
scale <- Gen.choose(-128, +128)
base <- implicitly[Arbitrary[BigInt]].arbitrary
} yield {
(BigDecimal(base) * BigDecimal(10).pow(scale))
})

def defaultEq[T](t0: T, t1: T) = t0 == t1

trait HigherEq[M[_]] {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import Helpers.arbitraryBatched
class BatchedLaws extends CheckProperties with Matchers with PropertyChecks {

import BaseProperties._
implicit val arbitraryBigDecimalsHere = BaseProperties.arbReasonableBigDecimals

def testBatchedMonoid[A: Arbitrary: Monoid](name: String, size: Int): Unit = {
implicit val m: Monoid[Batched[A]] = Batched.compactingMonoid[A](size)
Expand All @@ -43,6 +44,10 @@ class BatchedLaws extends CheckProperties with Matchers with PropertyChecks {
testBatchedMonoid[BigInt]("BigInt", 10)
testBatchedMonoid[BigInt]("BigInt", 100)
testBatchedMonoid[BigInt]("BigInt", 1000000)
testBatchedMonoid[BigDecimal]("BigDecimal", 1)
testBatchedMonoid[BigDecimal]("BigDecimal", 10)
testBatchedMonoid[BigDecimal]("BigDecimal", 100)
testBatchedMonoid[BigDecimal]("BigDecimal", 1000000)
testBatchedMonoid[String]("String", 1)
testBatchedMonoid[String]("String", 10)
testBatchedMonoid[String]("String", 100)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ class CmsLaws extends PropSpec with PropertyChecks with Matchers {
check(monoidLawsEquiv[CMS[BigInt]])
}

property("CountMinSketch[BigDecimal] is a Monoid") {
implicit val cmsMonoid = CMS.monoid[BigDecimal](EPS, DELTA, SEED)
implicit val cmsGen = createArbitrary[BigDecimal](cmsMonoid)
check(monoidLawsEquiv[CMS[BigDecimal]])
}

property("CountMinSketch[String] is a Monoid") {
implicit val cmsMonoid = CMS.monoid[String](EPS, DELTA, SEED)
implicit val cmsGen = createArbitrary[String](cmsMonoid)
Expand Down Expand Up @@ -111,6 +117,12 @@ class TopPctCmsLaws extends PropSpec with PropertyChecks with Matchers {
monoidLaws[TopCMS[BigInt]]
}

property("TopPctCms[BigDecimal] is a Monoid") {
implicit val cmsMonoid = TopPctCMS.monoid[BigDecimal](EPS, DELTA, SEED, HEAVY_HITTERS_PCT)
implicit val cmsGen = createArbitrary[BigDecimal](cmsMonoid)
monoidLaws[TopCMS[BigDecimal]]
}

property("TopPctCms[String] is a Monoid") {
implicit val cmsMonoid = TopPctCMS.monoid[String](EPS, DELTA, SEED, HEAVY_HITTERS_PCT)
implicit val cmsGen = createArbitrary[String](cmsMonoid)
Expand Down Expand Up @@ -273,6 +285,7 @@ class CMSShortTest extends CMSTest[Short]
class CMSIntTest extends CMSTest[Int]
class CMSLongTest extends CMSTest[Long]
class CMSBigIntTest extends CMSTest[BigInt]
class CMSBigDecimalTest extends CMSTest[BigDecimal]
class CMSStringTest extends CMSTest[String]
class CMSBytesTest extends CMSTest[Bytes]

Expand Down Expand Up @@ -964,6 +977,7 @@ class CMSHasherShortSpec extends CMSHasherSpec[Short]
class CMSHasherIntSpec extends CMSHasherSpec[Int]
class CMSHasherLongSpec extends CMSHasherSpec[Long]
class CMSHasherBigIntSpec extends CMSHasherSpec[BigInt]
class CMSHasherBigDecimalSpec extends CMSHasherSpec[BigDecimal]
class CMSHasherStringSpec extends CMSHasherSpec[String]
class CMSHasherBytesSpec extends CMSHasherSpec[Bytes]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ object FromIntLike {
override def fromInt(x: Int): BigInt = BigInt(x)
}

implicit object FromIntBigDecimal extends FromIntLike[BigDecimal] {
override def fromInt(x: Int): BigDecimal = BigDecimal(x)
}

implicit object FromIntString extends FromIntLike[String] {
override def fromInt(x: Int): String = x.toString
}
Expand Down

0 comments on commit d83922c

Please sign in to comment.