twitter · isnotinvain · Dec 2, 2016 · Dec 1, 2016 · Dec 1, 2016 · Dec 1, 2016
diff --git a/CHANGES.md b/CHANGES.md
@@ -14,6 +14,7 @@
 * Deprecates broken group/ring for `Future`/`Try`: https://github.com/twitter/algebird/pull/584
 * Add `metricsLaws[T]` to `BaseProperties` in `algebird-test`: https://github.com/twitter/algebird/pull/584
 * Modify generated `Tuple2Monoid`, etc to extend `TupleNSemigroup`, giving subclasses access to efficient `sumOption`: https://github.com/twitter/algebird/pull/585
+* optimize `Generated{Abstract,Product}Algebra.sumOption` with benchmarking https://github.com/twitter/algebird/pull/591
 
 ### Version 0.12.2 ###
 
@@ -36,7 +37,6 @@
 * Add `Batched[A]` type for efficient lazy addition: https://github.com/twitter/algebird/pull/530
 * Add a default `k` value for `Aggregator.approximatePercentile`: https://github.com/twitter/algebird/pull/531
 
-
 ### Version 0.12.0 ###
 
 * Implement an appendMonoid Aggregator factory which yields aggregators…: https://github.com/twitter/algebird/pull/501

diff --git a/algebird-benchmark/README.md b/algebird-benchmark/README.md
@@ -1,6 +1,8 @@
+# algebird-benchmark
+
 [jmh](http://openjdk.java.net/projects/code-tools/jmh/)-based Benchmarks for Algebird data structures.
 
-# Usage
+## Usage
 
 Run the following commands from the top-level Algebird directory:
 
@@ -10,19 +12,19 @@ Run the following commands from the top-level Algebird directory:
 Now you can run the following commands from within the sbt REPL:
 
     # List available benchmarks
-    > run -l
+    > jmh:run -l
 
     # Run a particular benchmark
-    > run .*HLLBenchmark.*
+    > jmh:run -t1 -f1 -wi 2 -i 3 .*AveragedValueBenchmark.*
 
-    # Run all benchmarks (apparently this is broken, see https://github.com/softprops/cappi/issues/1)
-    > run .*
+    # Run all benchmarks
+    > jmh:run .*
 
-You can find further details in the [sbt-jmh](https://github.com/ktoso/sbt-jmh) documentation, which is the sbt plugin
-we use to run the jmh benchmarks.
+These options tell JMH to run the benchmark with 1 thread (`-t1`), 1 fork (`-f1`), 2 warmup iterations and 3 real iterations. You can find further details in the [sbt-jmh](https://github.com/ktoso/sbt-jmh) documentation.
 
 Example output for [CMSBenchmark](src/main/scala/com/twitter/algebird/benchmark/CMSBenchmark.scala):
 
+```
 Running:
  3 Iterations
  3 Warmups per trial
@@ -68,3 +70,4 @@ Running:
 [info] CMSBenchmark.timePlusOfFirstHundredIntegersWithLongCms    0.0000001  0.005                0.2       2048           100  thrpt    3  1768.006 ± 2623.229  ops/s
 [info] CMSBenchmark.timePlusOfRandom2048BitNumbersWithBigIntCms  0.0000001  0.005                0.2       2048           100  thrpt    3   106.443 ±  201.605  ops/s
 [info] CMSBenchmark.timePlusOfRandom2048BitNumbersWithStringCms  0.0000001  0.005                0.2       2048           100  thrpt    3   107.031 ±  139.073  ops/s
+```
diff --git a/algebird-benchmark/src/main/scala/com/twitter/algebird/benchmark/Tuple4Benchmark.scala b/algebird-benchmark/src/main/scala/com/twitter/algebird/benchmark/Tuple4Benchmark.scala
@@ -0,0 +1,57 @@
+package com.twitter.algebird
+package benchmark
+
+import scala.util.Random
+import org.openjdk.jmh.annotations._
+import org.openjdk.jmh.infra.Blackhole
+
+import scala.math._
+
+object Tuple4Benchmark {
+  type Long4 = (Long, Long, Long, Long)
+  @State(Scope.Benchmark)
+  class Tuple4State {
+    /**
+     * This monoid lives in `GeneratedAbstractAlgebra.scala`.
+     */
+    val tupleMonoid: Monoid[Long4] = implicitly
+
+    /**
+     * This monoid lives in `GeneratedProductAlgebra.scala`.
+     */
+    val productMonoid: Monoid[Long4] =
+      Monoid[Long4, Long, Long, Long, Long](Tuple4.apply, Tuple4.unapply)
+
+    @Param(Array("10000"))
+    var numElements: Int = 0
+
+    var inputData: Seq[(Long, Long, Long, Long)] = _
+
+    private def randL: Long = Random.nextInt(1000).toLong
+
+    @Setup(Level.Trial)
+    def setup(): Unit = {
+      inputData = Seq.fill(numElements)((randL, randL, randL, randL))
+    }
+  }
+}
+
+class Tuple4Benchmark {
+  import Tuple4Benchmark._
+
+  @Benchmark
+  def timeTuplePlus(state: Tuple4State, bh: Blackhole) =
+    bh.consume(state.inputData.reduce(state.tupleMonoid.plus(_, _)))
+
+  @Benchmark
+  def timeTupleSumOption(state: Tuple4State, bh: Blackhole) =
+    bh.consume(state.tupleMonoid.sumOption(state.inputData))
+
+  @Benchmark
+  def timeProductPlus(state: Tuple4State, bh: Blackhole) =
+    bh.consume(state.inputData.reduce(state.productMonoid.plus(_, _)))
+
+  @Benchmark
+  def timeProductSumOption(state: Tuple4State, bh: Blackhole) =
+    bh.consume(state.productMonoid.sumOption(state.inputData))
+}
diff --git a/algebird-core/src/main/scala/com/twitter/algebird/BufferedOperation.scala b/algebird-core/src/main/scala/com/twitter/algebird/BufferedOperation.scala
@@ -53,6 +53,19 @@ abstract class ArrayBufferedOperation[I, O](size: Int) extends Buffered[I, O] {
   def isFlushed = buffer.isEmpty
 }
 
+object ArrayBufferedOperation {
+  /**
+   * Returns an ArrayBufferedOperation instance that internally uses
+   * the `sumOption` implementation of the supplied Semigroup[T]
+   */
+  def fromSumOption[T](size: Int)(implicit sg: Semigroup[T]): BufferedReduce[T] =
+    new ArrayBufferedOperation[T, T](size) with BufferedReduce[T] {
+      // calling `.get is okay because the interface guarantees a
+      // non-empty sequence.
+      def operate(items: Seq[T]) = sg.sumOption(items.iterator).get
+    }
+}
+
 /**
  * This never emits on put, you must call flush
  * designed to be use in the stackable pattern with ArrayBufferedOperation