Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Predecessible and methods to Interval #262

Merged
merged 6 commits into from
Feb 25, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 87 additions & 3 deletions algebird-core/src/main/scala/com/twitter/algebird/Interval.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ package com.twitter.algebird
// TODO this is clearly more general than summingbird, and should be extended to be a ring (add union, etc...)

/** Represents a single interval on a T with an Ordering
* TODO remove T => Boolean. it ruins toString and doesn't help anything
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have an issue number we should be referencing here?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added those for #263

* https://github.com/twitter/algebird/issues/261
*/
sealed trait Interval[T] extends (T => Boolean) with java.io.Serializable {
def contains(t: T): Boolean
Expand Down Expand Up @@ -57,8 +59,38 @@ object Interval extends java.io.Serializable {
}

// Marker traits to keep lower on the left in Intersection
sealed trait Lower[T] extends Interval[T]
sealed trait Upper[T] extends Interval[T]
sealed trait Lower[T] extends Interval[T] {
/**
* The smallest value that is contained here
* This is an Option, because of cases like ExclusiveLower(Int.MaxValue)
* which are pathological and equivalent to Empty
*/
def least(implicit s: Successible[T]): Option[T]
/** Iterates all the items in this Lower[T] from lowest to highest
*/
def toIterable(implicit s: Successible[T]): Iterable[T] =
least match {
case Some(l) => s.iterateNext(l)
case None => Iterable.empty
}
}
sealed trait Upper[T] extends Interval[T] {
/**
* The smallest value that is contained here
* This is an Option, because of cases like ExclusiveUpper(Int.MinValue),
* which are pathological and equivalent to Empty
*/
def greatest(implicit p: Predecessible[T]): Option[T]
// The smallest value that is not present
def strictUpperBound(implicit s: Successible[T]): Option[T]
/** Iterates all the items in this Upper[T] from highest to lowest
*/
def toIterable(implicit p: Predecessible[T]): Iterable[T] =
greatest match {
case Some(g) => p.iteratePrev(g)
case None => Iterable.empty
}
}

case class InclusiveLower[T](lower: T)(implicit val ordering: Ordering[T]) extends Interval[T] with Lower[T] {
def contains(t: T): Boolean = ordering.lteq(lower, t)
Expand All @@ -73,6 +105,7 @@ case class InclusiveLower[T](lower: T)(implicit val ordering: Ordering[T]) exten
case lb@ExclusiveLower(thatlb) => if (lb.ordering.gt(lower, thatlb)) this else that
case Intersection(thatL, thatU) => (this && thatL) && thatU
}
def least(implicit s: Successible[T]): Option[T] = Some(lower)
def mapNonDecreasing[U:Ordering](fn: T => U): Interval[U] = InclusiveLower(fn(lower))
}
case class ExclusiveLower[T](lower: T)(implicit val ordering: Ordering[T]) extends Interval[T] with Lower[T] {
Expand All @@ -88,10 +121,14 @@ case class ExclusiveLower[T](lower: T)(implicit val ordering: Ordering[T]) exten
case lb@ExclusiveLower(thatlb) => if (lb.ordering.gteq(lower, thatlb)) this else that
case Intersection(thatL, thatU) => (this && thatL) && thatU
}
def least(implicit s: Successible[T]): Option[T] = s.next(lower)
def mapNonDecreasing[U:Ordering](fn: T => U): Interval[U] = ExclusiveLower(fn(lower))
}
case class InclusiveUpper[T](upper: T)(implicit val ordering: Ordering[T]) extends Interval[T] with Upper[T] {
def contains(t: T): Boolean = ordering.lteq(t, upper)
def greatest(implicit p: Predecessible[T]): Option[T] = Some(upper)
// The smallest value that is not present
def strictUpperBound(implicit s: Successible[T]): Option[T] = s.next(upper)
def intersect(that: Interval[T]): Interval[T] = that match {
case Universe() => this
case Empty() => that
Expand All @@ -109,6 +146,9 @@ case class InclusiveUpper[T](upper: T)(implicit val ordering: Ordering[T]) exten
}
case class ExclusiveUpper[T](upper: T)(implicit val ordering: Ordering[T]) extends Interval[T] with Upper[T] {
def contains(t: T): Boolean = ordering.lt(t, upper)
def greatest(implicit p: Predecessible[T]): Option[T] = p.prev(upper)
// The smallest value that is not present
def strictUpperBound(implicit s: Successible[T]): Option[T] = Some(upper)
def intersect(that: Interval[T]): Interval[T] = that match {
case Universe() => this
case Empty() => that
Expand All @@ -128,9 +168,53 @@ case class ExclusiveUpper[T](upper: T)(implicit val ordering: Ordering[T]) exten
case class Intersection[T](lower: Lower[T], upper: Upper[T]) extends Interval[T] {
def contains(t: T): Boolean = lower.contains(t) && upper.contains(t)
def intersect(that: Interval[T]): Interval[T] = that match {
case Universe() => this
case Empty() => that
case lb@InclusiveLower(_) => (lb && lower) && upper
case lb@ExclusiveLower(_) => (lb && lower) && upper
case ub@InclusiveUpper(_) => lower && (ub && upper)
case ub@ExclusiveUpper(_) => lower && (ub && upper)
case Intersection(thatL, thatU) => (lower && thatL) && (upper && thatU)
case _ => (lower && that) && (upper && that)
}
def mapNonDecreasing[U:Ordering](fn: T => U): Interval[U] =
lower.mapNonDecreasing(fn) && upper.mapNonDecreasing(fn)

/** Goes from lowest to highest for all items
* that are contained in this Intersection
*/
def leastToGreatest(implicit s: Successible[T]): Iterable[T] = {
val self = this
// TODO https://github.com/twitter/algebird/issues/263
new AbstractIterable[T] {
// we have to do this because the normal takeWhile causes OOM on big intervals
def iterator = lower.toIterable.iterator.takeWhile(self.upper.contains(_))
}
}
/** Goes from highest to lowest for all items
* that are contained in this Intersection
*/
def greatestToLeast(implicit p: Predecessible[T]): Iterable[T] = {
val self = this
// TODO https://github.com/twitter/algebird/issues/263
new AbstractIterable[T] {
// we have to do this because the normal takeWhile causes OOM on big intervals
def iterator = upper.toIterable.iterator.takeWhile(self.lower.contains(_))
}
}

/**
* Some intervals can actually be synonyms for empty:
* (0,0) for instance, contains nothing. This cannot be normalized to
* [a, b) form, thus we return an option
* Also, there are cases like [Int.MinValue, Int.MaxValue] that cannot
* are actually equivalent to Universe.
* The bottom line: if this returns None, it just means you can't express
* it this way, it does not mean it is empty or universe, etc... (there
* are other cases).
*/
def toLeftClosedRightOpen(implicit s: Successible[T]): Option[(T, T)] =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How to you imagine this being used? This seems like a terribly specific method. I guess I don't want to have 4 methods for all of the kinds, and would rather thing of a more general way to let people transforms intervals into forms that they wnt

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, we actually have a lot of interval parsing code in summingbird that is a pain given how general Intervals can be. This was for the common cases of finite interval that can be expressed as >= low, < high, which has the benefit of being easy to split: given low < h1 < high, then you can break this into two adjacent intervals: (low, h1), (h1, high).

Especially when converting to existing code that uses ranges like this, this method can be useful.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The issues I've seen are around super-intervals (if that's the right word), e.g., batches being a superset of timestamps. Would it make sense to optimize around this?

For example, something like:

def toIntervalOf[U](implicit mapping: Injection[T, Interval[U]]): Interval[U]

Maybe it should be abstracted a different way though, to more simply handle cases like going from timestamp back to batch.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an interesting case. So far, we have this mapNonDecreasing on intervals, but your point is nice: often a value in one case is a interval in the other. Such as: Injection[BatchID, Interval[Timestamp]].

If [B1, B2), that means that you have [least(time(B1)), least(time(B2)))

Similarly:

[B1, B2] => [least(time(B1)), greatest(time(B2))]
(B1, B2) => [greatest(time(B1)), least(time(B2)))
etc…

I think this is a useful pattern. Consider The truncation of Injection[Int, Interval[Double]] 1 => [1.0, 2.0) for instance. Though, really we might want to loosen it to Injection[Interval[Int], Interval[Double]], this is what it sounds like you were saying with supersets: If and interval is a super-interval of another, it means there is an Injection[Interval[T], Interval[U]] right?

I'd rather follow this up with more code moved from summingbird into this interval, successible, set of stuff. Can we do this in a next PR?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps the proper abstraction is to have a "smallest containing interval" and "largest contained interval", if we want to also cleanly handle cases where interval edges don't necessarily line up, such as when going from Timestamp to BatchID.

Anyway, separate PRs is better.

for {
l <- lower.least
g <- upper.strictUpperBound
} yield (l, g)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
Copyright 2014 Twitter, Inc.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package com.twitter.algebird

/**
* This is a typeclass to represent things which are countable down. Note that it is important
* that a value prev(t) is always less than t. Note
* that prev returns Option because this class comes with the notion that some items may reach a minimum
* key, which is None.
*/
trait Predecessible[T] extends java.io.Serializable {
def prev(old: T): Option[T]
def prev(old: Option[T]): Option[T] = old.flatMap(prev)
def iteratePrev(old: T): Iterable[T] = {
val self = this
// TODO in scala 2.11, there is an AbstractIterable which should be used here
// to reduce generated class size due to all the methods in Iterable.
// https://github.com/twitter/algebird/issues/263
new AbstractIterable[T] {
def iterator =
Iterator.iterate[Option[T]](Some(old)) { self.prev(_) }
.takeWhile(_.isDefined)
.map(_.get)
}
}
def ordering: Ordering[T]
}

object Predecessible extends java.io.Serializable {
// enables: Predecessible.prev(2) == Some(1)
def prev[T](t: T)(implicit p: Predecessible[T]): Option[T] = p.prev(t)
def prev[T](t: Option[T])(implicit p: Predecessible[T]): Option[T] = p.prev(t)

def iteratePrev[T](first: T)(implicit p: Predecessible[T]): Iterable[T] =
p.iteratePrev(first)

implicit def integralPrev[N: Integral]: Predecessible[N] = new IntegralPredecessible[N]
}

class IntegralPredecessible[T:Integral] extends Predecessible[T] {
def prev(old: T) = {
val numeric = implicitly[Integral[T]]
val newV = numeric.minus(old, numeric.one)
if (ordering.compare(newV, old) >= 0) {
// We wrapped around
None
} else {
Some(newV)
}
}

def ordering: Ordering[T] = implicitly[Integral[T]]
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,27 @@ package com.twitter.algebird
trait Successible[@specialized(Int,Long,Float,Double) T] {
def next(old: T): Option[T]
def next(old: Option[T]): Option[T] = old flatMap next
def iterateNext(old: T): Iterable[T] = {
val self = this
// TODO in scala 2.11, there is an AbstractIterable which should be used here
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar to earlier stuff, should we have an issue or some identifier so these can be grepped for later?

// to reduce generated class size due to all the methods in Iterable.
// https://github.com/twitter/algebird/issues/263
new AbstractIterable[T] {
def iterator =
Iterator.iterate[Option[T]](Some(old)) { self.next(_) }
.takeWhile(_.isDefined)
.map(_.get)
}
}
def ordering: Ordering[T]
}

object Successible {
// enables: Successible.next(2) == Some(3)
def next[T](t: T)(implicit succ: Successible[T]): Option[T] = succ.next(t)
def next[T](t: Option[T])(implicit succ: Successible[T]): Option[T] = succ.next(t)
def iterateNext[T](old: T)(implicit succ: Successible[T]): Iterable[T] =
succ.iterateNext(old)

implicit def numSucc[N: Numeric]: Successible[N] = new NumericSuccessible[N]

Expand All @@ -52,6 +66,7 @@ object Successible {
}
}

// TODO Remove Ordering. It is unused. Note Numeric and Integral extend ordering
class NumericSuccessible[@specialized(Int,Long,Float,Double) T:Numeric:Ordering] extends Successible[T] {
def next(old: T) = {
val numeric = implicitly[Numeric[T]]
Expand All @@ -63,5 +78,5 @@ class NumericSuccessible[@specialized(Int,Long,Float,Double) T:Numeric:Ordering]
}
}

def ordering = implicitly[Ordering[T]]
def ordering: Ordering[T] = implicitly[Numeric[T]]
}
26 changes: 26 additions & 0 deletions algebird-core/src/main/scala/com/twitter/algebird/package.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
Copyright 2014 Twitter, Inc.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package com.twitter

package object algebird {
/** TODO remove these in scala 2.11 and use the standard there.
* these are here to avoid massive bloat around these classes
* https://github.com/twitter/algebird/issues/263
*/
private [algebird] abstract class AbstractIterable[T] extends Iterable[T]
private [algebird] abstract class AbstractIterator[T] extends Iterator[T]
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,16 @@ import org.scalacheck.Gen._
object Generators {

implicit def intervalArb[T:Arbitrary:Ordering]: Arbitrary[Interval[T]] =
Arbitrary(oneOf(genUniverse, genEmpty, genInclusiveLower, genExclusiveLower, genInclusiveUpper, genExclusiveUpper))
Arbitrary(oneOf(genUniverse, genEmpty, genInclusiveLower, genExclusiveLower, genInclusiveUpper, genExclusiveUpper, genIntersection))

implicit def lowerIntArb[T:Arbitrary:Ordering]: Arbitrary[Lower[T]] =
Arbitrary(oneOf(genInclusiveLower, genExclusiveLower))

implicit def upperIntArb[T:Arbitrary:Ordering]: Arbitrary[Upper[T]] =
Arbitrary(oneOf(genInclusiveUpper, genExclusiveUpper))

implicit def intersectionArb[T:Arbitrary:Ordering]: Arbitrary[Intersection[T]] =
Arbitrary(genIntersection)

def genUniverse[T:Arbitrary:Ordering] =
for {
Expand Down Expand Up @@ -58,4 +67,10 @@ object Generators {
for {
u <- Arbitrary.arbitrary[T]
} yield ExclusiveUpper(u)

def genIntersection[T:Arbitrary:Ordering] =
for {
l <- Arbitrary.arbitrary[Lower[T]]
u <- Arbitrary.arbitrary[Upper[T]] if ((l && u) != Empty[T]())
} yield Intersection(l, u)
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,15 @@ object IntervalLaws extends Properties("Interval") {
import Generators._

property("[x, x + 1) contains x") =
forAll { y: Int =>
forAll { y: Int =>
val x = y.asInstanceOf[Long]
Interval.leftClosedRightOpen(x, x + 1).contains(x)
Interval.leftClosedRightOpen(x, x + 1).contains(x)
}

property("(x, x + 1] contains x + 1") =
forAll { y: Int =>
forAll { y: Int =>
val x = y.asInstanceOf[Long]
Interval.leftOpenRightClosed(x, x + 1).contains(x + 1)
Interval.leftOpenRightClosed(x, x + 1).contains(x + 1)
}

property("[x, x + 1) does not contain x + 1") =
Expand All @@ -47,4 +47,58 @@ object IntervalLaws extends Properties("Interval") {
forAll { (item: Long, i1: Interval[Long], i2: Interval[Long]) =>
(i1 && i2).contains(item) == (i1(item) && i2(item))
}

property("toLeftClosedRightOpen is an Injection") =
forAll { (intr: Intersection[Long], tests: List[Long]) =>
intr.toLeftClosedRightOpen.map { case (low, high) =>
val intr2 = Interval.leftClosedRightOpen(low, high)
tests.forall { t => intr(t) == intr2(t) }
}.getOrElse(true) // none means this can't be expressed as this kind of interval
}
property("least is the smallest") =
forAll { (lower: Lower[Long]) =>
(for {
le <- lower.least
ple <- Predecessible.prev(le)
} yield (lower.contains(le) && !lower.contains(ple)))
.getOrElse {
lower match {
case InclusiveLower(l) => l == Long.MinValue
case ExclusiveLower(l) => false // prev should be the lowest
}
}
}

property("greatest is the biggest") =
forAll { (upper: Upper[Long]) =>
(for {
gr <- upper.greatest
ngr <- Successible.next(gr)
} yield (upper.contains(gr) && !upper.contains(ngr)))
.getOrElse {
upper match {
case InclusiveUpper(l) => l == Long.MaxValue
case ExclusiveUpper(l) => false // prev should be the lowest
}
}
}

property("leastToGreatest and greatestToLeast are ordered and adjacent") =
forAll { (intr: Intersection[Long]) =>
val items1 = intr.leastToGreatest.take(100)
(items1.size < 2) || items1.sliding(2).forall { it =>
it.toList match {
case low::high::Nil if (low + 1L == high) => true
case _ => false
}
} &&
{ val items2 = intr.greatestToLeast.take(100)
(items2.size < 2) || items2.sliding(2).forall { it =>
it.toList match {
case high::low::Nil if (low + 1L == high) => true
case _ => false
}
}
}
}
}