-
Notifications
You must be signed in to change notification settings - Fork 534
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Third optimization batch — map fusion #95
Changes from all commits
d20b54e
785ee2b
174ec54
c23f92c
540c879
7bde17f
b553dad
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
/* | ||
* Copyright 2017 Typelevel | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package cats.effect.benchmarks | ||
|
||
import java.util.concurrent.TimeUnit | ||
import cats.effect.IO | ||
import org.openjdk.jmh.annotations._ | ||
|
||
/** To do comparative benchmarks between versions: | ||
* | ||
* benchmarks/run-benchmark MapCallsBenchmark | ||
* | ||
* This will generate results in `benchmarks/results`. | ||
* | ||
* Or to run the benchmark from within SBT: | ||
* | ||
* jmh:run -i 10 -wi 10 -f 2 -t 1 cats.effect.benchmarks.MapCallsBenchmark | ||
* | ||
* Which means "10 iterations", "10 warm-up iterations", "2 forks", "1 thread". | ||
* Please note that benchmarks should be usually executed at least in | ||
* 10 iterations (as a rule of thumb), but more is better. | ||
*/ | ||
@State(Scope.Thread) | ||
@BenchmarkMode(Array(Mode.Throughput)) | ||
@OutputTimeUnit(TimeUnit.SECONDS) | ||
class MapCallsBenchmark { | ||
import MapCallsBenchmark.test | ||
|
||
@Benchmark | ||
def one(): Long = test(12000, 1) | ||
|
||
@Benchmark | ||
def batch30(): Long = test(12000 / 30, 30) | ||
|
||
@Benchmark | ||
def batch120(): Long = test(12000 / 120, 120) | ||
} | ||
|
||
object MapCallsBenchmark { | ||
|
||
def test(iterations: Int, batch: Int): Long = { | ||
val f = (x: Int) => x + 1 | ||
var io = IO(0) | ||
|
||
var j = 0 | ||
while (j < batch) { io = io.map(f); j += 1 } | ||
|
||
var sum = 0L | ||
var i = 0 | ||
while (i < iterations) { | ||
sum += io.unsafeRunSync() | ||
i += 1 | ||
} | ||
sum | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
/* | ||
* Copyright 2017 Typelevel | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package cats.effect.benchmarks | ||
|
||
import java.util.concurrent.TimeUnit | ||
import cats.effect.IO | ||
import org.openjdk.jmh.annotations._ | ||
|
||
/** To do comparative benchmarks between versions: | ||
* | ||
* benchmarks/run-benchmark MapStreamBenchmark | ||
* | ||
* This will generate results in `benchmarks/results`. | ||
* | ||
* Or to run the benchmark from within SBT: | ||
* | ||
* jmh:run -i 10 -wi 10 -f 2 -t 1 cats.effect.benchmarks.MapStreamBenchmark | ||
* | ||
* Which means "10 iterations", "10 warm-up iterations", "2 forks", "1 thread". | ||
* Please note that benchmarks should be usually executed at least in | ||
* 10 iterations (as a rule of thumb), but more is better. | ||
*/ | ||
@State(Scope.Thread) | ||
@BenchmarkMode(Array(Mode.Throughput)) | ||
@OutputTimeUnit(TimeUnit.SECONDS) | ||
class MapStreamBenchmark { | ||
import MapStreamBenchmark.streamTest | ||
|
||
@Benchmark | ||
def one(): Long = streamTest(12000, 1) | ||
|
||
@Benchmark | ||
def batch30(): Long = streamTest(1000, 30) | ||
|
||
@Benchmark | ||
def batch120(): Long = streamTest(100, 120) | ||
} | ||
|
||
object MapStreamBenchmark { | ||
def streamTest(times: Int, batchSize: Int): Long = { | ||
var stream = range(0, times) | ||
var i = 0 | ||
while (i < batchSize) { | ||
stream = mapStream(addOne)(stream) | ||
i += 1 | ||
} | ||
sum(0)(stream).unsafeRunSync() | ||
} | ||
|
||
final case class Stream(value: Int, next: IO[Option[Stream]]) | ||
val addOne = (x: Int) => x + 1 | ||
|
||
def range(from: Int, until: Int): Option[Stream] = | ||
if (from < until) | ||
Some(Stream(from, IO(range(from + 1, until)))) | ||
else | ||
None | ||
|
||
def mapStream(f: Int => Int)(box: Option[Stream]): Option[Stream] = | ||
box match { | ||
case Some(Stream(value, next)) => | ||
Some(Stream(f(value), next.map(mapStream(f)))) | ||
case None => | ||
None | ||
} | ||
|
||
def sum(acc: Long)(box: Option[Stream]): IO[Long] = | ||
box match { | ||
case Some(Stream(value, next)) => | ||
next.flatMap(sum(acc + value)) | ||
case None => | ||
IO.pure(acc) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,7 +23,7 @@ import cats.effect.IO | |
|
||
import scala.concurrent.blocking | ||
import scala.concurrent.duration.{Duration, FiniteDuration} | ||
import scala.util.Either | ||
import scala.util.{Either, Try} | ||
|
||
private[effect] object IOPlatform { | ||
/** | ||
|
@@ -84,4 +84,35 @@ private[effect] object IOPlatform { | |
true | ||
} | ||
} | ||
|
||
/** | ||
* Establishes the maximum stack depth for `IO#map` operations. | ||
* | ||
* The default is `128`, from which we substract one as an | ||
* optimization. This default has been reached like this: | ||
* | ||
* - according to official docs, the default stack size on 32-bits | ||
* Windows and Linux was 320 KB, whereas for 64-bits it is 1024 KB | ||
* - according to measurements chaining `Function1` references uses | ||
* approximately 32 bytes of stack space on a 64 bits system; | ||
* this could be lower if "compressed oops" is activated | ||
* - therefore a "map fusion" that goes 128 in stack depth can use | ||
* about 4 KB of stack space | ||
* | ||
* If this parameter becomes a problem, it can be tuned by setting | ||
* the `cats.effect.fusionMaxStackDepth` environment variable when | ||
* executing the Java VM: | ||
* | ||
* <pre> | ||
* java -Dcats.effect.fusionMaxStackDepth=32 \ | ||
* ... | ||
* </pre> | ||
*/ | ||
private[effect] final val fusionMaxStackDepth = | ||
Option(System.getProperty("cats.effect.fusionMaxStackDepth", "")) | ||
.filter(s => s != null && s.nonEmpty) | ||
.flatMap(s => Try(s.toInt).toOption) | ||
.filter(_ > 0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just confirming what this looks like at There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, that's the intention — which made me realize that when that counter gets reset we should use a |
||
.map(_ - 1) | ||
.getOrElse(127) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We don't want to depend on a logger, but is it worth it to explain on stderr why we choked?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd prefer to not do it, since it introduces extra code — but I don't care much and if it's a popular demand, then OK.
What I'm thinking is that people won't modify this parameter unless they are in big trouble and we can have two issues:
So increasing it won't increase performance unless used for very narrow use-cases and if people hit the stack limit because of this default, then we probably need to lower this limit in the library, with the overriding option being made available only to empower people to fix it without having to wait for another release.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay, I'll buy that.