-
Notifications
You must be signed in to change notification settings - Fork 347
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add Predecessible and methods to Interval #262
Changes from all commits
478b477
8d17fe6
1d90ede
5bf3bdf
a17c309
4daef99
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,8 @@ package com.twitter.algebird | |
// TODO this is clearly more general than summingbird, and should be extended to be a ring (add union, etc...) | ||
|
||
/** Represents a single interval on a T with an Ordering | ||
* TODO remove T => Boolean. it ruins toString and doesn't help anything | ||
* https://github.com/twitter/algebird/issues/261 | ||
*/ | ||
sealed trait Interval[T] extends (T => Boolean) with java.io.Serializable { | ||
def contains(t: T): Boolean | ||
|
@@ -57,8 +59,38 @@ object Interval extends java.io.Serializable { | |
} | ||
|
||
// Marker traits to keep lower on the left in Intersection | ||
sealed trait Lower[T] extends Interval[T] | ||
sealed trait Upper[T] extends Interval[T] | ||
sealed trait Lower[T] extends Interval[T] { | ||
/** | ||
* The smallest value that is contained here | ||
* This is an Option, because of cases like ExclusiveLower(Int.MaxValue) | ||
* which are pathological and equivalent to Empty | ||
*/ | ||
def least(implicit s: Successible[T]): Option[T] | ||
/** Iterates all the items in this Lower[T] from lowest to highest | ||
*/ | ||
def toIterable(implicit s: Successible[T]): Iterable[T] = | ||
least match { | ||
case Some(l) => s.iterateNext(l) | ||
case None => Iterable.empty | ||
} | ||
} | ||
sealed trait Upper[T] extends Interval[T] { | ||
/** | ||
* The smallest value that is contained here | ||
* This is an Option, because of cases like ExclusiveUpper(Int.MinValue), | ||
* which are pathological and equivalent to Empty | ||
*/ | ||
def greatest(implicit p: Predecessible[T]): Option[T] | ||
// The smallest value that is not present | ||
def strictUpperBound(implicit s: Successible[T]): Option[T] | ||
/** Iterates all the items in this Upper[T] from highest to lowest | ||
*/ | ||
def toIterable(implicit p: Predecessible[T]): Iterable[T] = | ||
greatest match { | ||
case Some(g) => p.iteratePrev(g) | ||
case None => Iterable.empty | ||
} | ||
} | ||
|
||
case class InclusiveLower[T](lower: T)(implicit val ordering: Ordering[T]) extends Interval[T] with Lower[T] { | ||
def contains(t: T): Boolean = ordering.lteq(lower, t) | ||
|
@@ -73,6 +105,7 @@ case class InclusiveLower[T](lower: T)(implicit val ordering: Ordering[T]) exten | |
case lb@ExclusiveLower(thatlb) => if (lb.ordering.gt(lower, thatlb)) this else that | ||
case Intersection(thatL, thatU) => (this && thatL) && thatU | ||
} | ||
def least(implicit s: Successible[T]): Option[T] = Some(lower) | ||
def mapNonDecreasing[U:Ordering](fn: T => U): Interval[U] = InclusiveLower(fn(lower)) | ||
} | ||
case class ExclusiveLower[T](lower: T)(implicit val ordering: Ordering[T]) extends Interval[T] with Lower[T] { | ||
|
@@ -88,10 +121,14 @@ case class ExclusiveLower[T](lower: T)(implicit val ordering: Ordering[T]) exten | |
case lb@ExclusiveLower(thatlb) => if (lb.ordering.gteq(lower, thatlb)) this else that | ||
case Intersection(thatL, thatU) => (this && thatL) && thatU | ||
} | ||
def least(implicit s: Successible[T]): Option[T] = s.next(lower) | ||
def mapNonDecreasing[U:Ordering](fn: T => U): Interval[U] = ExclusiveLower(fn(lower)) | ||
} | ||
case class InclusiveUpper[T](upper: T)(implicit val ordering: Ordering[T]) extends Interval[T] with Upper[T] { | ||
def contains(t: T): Boolean = ordering.lteq(t, upper) | ||
def greatest(implicit p: Predecessible[T]): Option[T] = Some(upper) | ||
// The smallest value that is not present | ||
def strictUpperBound(implicit s: Successible[T]): Option[T] = s.next(upper) | ||
def intersect(that: Interval[T]): Interval[T] = that match { | ||
case Universe() => this | ||
case Empty() => that | ||
|
@@ -109,6 +146,9 @@ case class InclusiveUpper[T](upper: T)(implicit val ordering: Ordering[T]) exten | |
} | ||
case class ExclusiveUpper[T](upper: T)(implicit val ordering: Ordering[T]) extends Interval[T] with Upper[T] { | ||
def contains(t: T): Boolean = ordering.lt(t, upper) | ||
def greatest(implicit p: Predecessible[T]): Option[T] = p.prev(upper) | ||
// The smallest value that is not present | ||
def strictUpperBound(implicit s: Successible[T]): Option[T] = Some(upper) | ||
def intersect(that: Interval[T]): Interval[T] = that match { | ||
case Universe() => this | ||
case Empty() => that | ||
|
@@ -128,9 +168,53 @@ case class ExclusiveUpper[T](upper: T)(implicit val ordering: Ordering[T]) exten | |
case class Intersection[T](lower: Lower[T], upper: Upper[T]) extends Interval[T] { | ||
def contains(t: T): Boolean = lower.contains(t) && upper.contains(t) | ||
def intersect(that: Interval[T]): Interval[T] = that match { | ||
case Universe() => this | ||
case Empty() => that | ||
case lb@InclusiveLower(_) => (lb && lower) && upper | ||
case lb@ExclusiveLower(_) => (lb && lower) && upper | ||
case ub@InclusiveUpper(_) => lower && (ub && upper) | ||
case ub@ExclusiveUpper(_) => lower && (ub && upper) | ||
case Intersection(thatL, thatU) => (lower && thatL) && (upper && thatU) | ||
case _ => (lower && that) && (upper && that) | ||
} | ||
def mapNonDecreasing[U:Ordering](fn: T => U): Interval[U] = | ||
lower.mapNonDecreasing(fn) && upper.mapNonDecreasing(fn) | ||
|
||
/** Goes from lowest to highest for all items | ||
* that are contained in this Intersection | ||
*/ | ||
def leastToGreatest(implicit s: Successible[T]): Iterable[T] = { | ||
val self = this | ||
// TODO https://github.com/twitter/algebird/issues/263 | ||
new AbstractIterable[T] { | ||
// we have to do this because the normal takeWhile causes OOM on big intervals | ||
def iterator = lower.toIterable.iterator.takeWhile(self.upper.contains(_)) | ||
} | ||
} | ||
/** Goes from highest to lowest for all items | ||
* that are contained in this Intersection | ||
*/ | ||
def greatestToLeast(implicit p: Predecessible[T]): Iterable[T] = { | ||
val self = this | ||
// TODO https://github.com/twitter/algebird/issues/263 | ||
new AbstractIterable[T] { | ||
// we have to do this because the normal takeWhile causes OOM on big intervals | ||
def iterator = upper.toIterable.iterator.takeWhile(self.lower.contains(_)) | ||
} | ||
} | ||
|
||
/** | ||
* Some intervals can actually be synonyms for empty: | ||
* (0,0) for instance, contains nothing. This cannot be normalized to | ||
* [a, b) form, thus we return an option | ||
* Also, there are cases like [Int.MinValue, Int.MaxValue] that cannot | ||
* are actually equivalent to Universe. | ||
* The bottom line: if this returns None, it just means you can't express | ||
* it this way, it does not mean it is empty or universe, etc... (there | ||
* are other cases). | ||
*/ | ||
def toLeftClosedRightOpen(implicit s: Successible[T]): Option[(T, T)] = | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How to you imagine this being used? This seems like a terribly specific method. I guess I don't want to have 4 methods for all of the kinds, and would rather thing of a more general way to let people transforms intervals into forms that they wnt There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, we actually have a lot of interval parsing code in summingbird that is a pain given how general Intervals can be. This was for the common cases of finite interval that can be expressed as >= low, < high, which has the benefit of being easy to split: given low < h1 < high, then you can break this into two adjacent intervals: (low, h1), (h1, high). Especially when converting to existing code that uses ranges like this, this method can be useful. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The issues I've seen are around super-intervals (if that's the right word), e.g., batches being a superset of timestamps. Would it make sense to optimize around this? For example, something like: def toIntervalOf[U](implicit mapping: Injection[T, Interval[U]]): Interval[U] Maybe it should be abstracted a different way though, to more simply handle cases like going from timestamp back to batch. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is an interesting case. So far, we have this mapNonDecreasing on intervals, but your point is nice: often a value in one case is a interval in the other. Such as: If [B1, B2), that means that you have Similarly:
I think this is a useful pattern. Consider The truncation of I'd rather follow this up with more code moved from summingbird into this interval, successible, set of stuff. Can we do this in a next PR? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps the proper abstraction is to have a "smallest containing interval" and "largest contained interval", if we want to also cleanly handle cases where interval edges don't necessarily line up, such as when going from Timestamp to BatchID. Anyway, separate PRs is better. |
||
for { | ||
l <- lower.least | ||
g <- upper.strictUpperBound | ||
} yield (l, g) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
/* | ||
Copyright 2014 Twitter, Inc. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
package com.twitter.algebird | ||
|
||
/** | ||
* This is a typeclass to represent things which are countable down. Note that it is important | ||
* that a value prev(t) is always less than t. Note | ||
* that prev returns Option because this class comes with the notion that some items may reach a minimum | ||
* key, which is None. | ||
*/ | ||
trait Predecessible[T] extends java.io.Serializable { | ||
def prev(old: T): Option[T] | ||
def prev(old: Option[T]): Option[T] = old.flatMap(prev) | ||
def iteratePrev(old: T): Iterable[T] = { | ||
val self = this | ||
// TODO in scala 2.11, there is an AbstractIterable which should be used here | ||
// to reduce generated class size due to all the methods in Iterable. | ||
// https://github.com/twitter/algebird/issues/263 | ||
new AbstractIterable[T] { | ||
def iterator = | ||
Iterator.iterate[Option[T]](Some(old)) { self.prev(_) } | ||
.takeWhile(_.isDefined) | ||
.map(_.get) | ||
} | ||
} | ||
def ordering: Ordering[T] | ||
} | ||
|
||
object Predecessible extends java.io.Serializable { | ||
// enables: Predecessible.prev(2) == Some(1) | ||
def prev[T](t: T)(implicit p: Predecessible[T]): Option[T] = p.prev(t) | ||
def prev[T](t: Option[T])(implicit p: Predecessible[T]): Option[T] = p.prev(t) | ||
|
||
def iteratePrev[T](first: T)(implicit p: Predecessible[T]): Iterable[T] = | ||
p.iteratePrev(first) | ||
|
||
implicit def integralPrev[N: Integral]: Predecessible[N] = new IntegralPredecessible[N] | ||
} | ||
|
||
class IntegralPredecessible[T:Integral] extends Predecessible[T] { | ||
def prev(old: T) = { | ||
val numeric = implicitly[Integral[T]] | ||
val newV = numeric.minus(old, numeric.one) | ||
if (ordering.compare(newV, old) >= 0) { | ||
// We wrapped around | ||
None | ||
} else { | ||
Some(newV) | ||
} | ||
} | ||
|
||
def ordering: Ordering[T] = implicitly[Integral[T]] | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,13 +27,27 @@ package com.twitter.algebird | |
trait Successible[@specialized(Int,Long,Float,Double) T] { | ||
def next(old: T): Option[T] | ||
def next(old: Option[T]): Option[T] = old flatMap next | ||
def iterateNext(old: T): Iterable[T] = { | ||
val self = this | ||
// TODO in scala 2.11, there is an AbstractIterable which should be used here | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar to earlier stuff, should we have an issue or some identifier so these can be grepped for later? |
||
// to reduce generated class size due to all the methods in Iterable. | ||
// https://github.com/twitter/algebird/issues/263 | ||
new AbstractIterable[T] { | ||
def iterator = | ||
Iterator.iterate[Option[T]](Some(old)) { self.next(_) } | ||
.takeWhile(_.isDefined) | ||
.map(_.get) | ||
} | ||
} | ||
def ordering: Ordering[T] | ||
} | ||
|
||
object Successible { | ||
// enables: Successible.next(2) == Some(3) | ||
def next[T](t: T)(implicit succ: Successible[T]): Option[T] = succ.next(t) | ||
def next[T](t: Option[T])(implicit succ: Successible[T]): Option[T] = succ.next(t) | ||
def iterateNext[T](old: T)(implicit succ: Successible[T]): Iterable[T] = | ||
succ.iterateNext(old) | ||
|
||
implicit def numSucc[N: Numeric]: Successible[N] = new NumericSuccessible[N] | ||
|
||
|
@@ -52,6 +66,7 @@ object Successible { | |
} | ||
} | ||
|
||
// TODO Remove Ordering. It is unused. Note Numeric and Integral extend ordering | ||
class NumericSuccessible[@specialized(Int,Long,Float,Double) T:Numeric:Ordering] extends Successible[T] { | ||
def next(old: T) = { | ||
val numeric = implicitly[Numeric[T]] | ||
|
@@ -63,5 +78,5 @@ class NumericSuccessible[@specialized(Int,Long,Float,Double) T:Numeric:Ordering] | |
} | ||
} | ||
|
||
def ordering = implicitly[Ordering[T]] | ||
def ordering: Ordering[T] = implicitly[Numeric[T]] | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
/* | ||
Copyright 2014 Twitter, Inc. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package com.twitter | ||
|
||
package object algebird { | ||
/** TODO remove these in scala 2.11 and use the standard there. | ||
* these are here to avoid massive bloat around these classes | ||
* https://github.com/twitter/algebird/issues/263 | ||
*/ | ||
private [algebird] abstract class AbstractIterable[T] extends Iterable[T] | ||
private [algebird] abstract class AbstractIterator[T] extends Iterator[T] | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we have an issue number we should be referencing here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
added those for #263