From 7e7e037b3e3efb0f1a7b5dab75d67e8f26613496 Mon Sep 17 00:00:00 2001
From: "Steven G. Johnson" <stevenj@alum.mit.edu>
Date: Sat, 21 Jan 2017 00:57:03 -0500
Subject: [PATCH 1/4] added btime and belapsed macros, fixed prettymemory to
 use SI units

---
 REQUIRE                |  2 +-
 doc/manual.md          | 38 +++++++++++++-----
 src/BenchmarkTools.jl  |  4 +-
 src/execution.jl       | 91 +++++++++++++++++++++++++++++++++++++-----
 src/trials.jl          |  8 ++--
 test/ExecutionTests.jl | 21 ++++++++++
 test/TrialsTests.jl    | 12 +++---
 7 files changed, 143 insertions(+), 33 deletions(-)

diff --git a/REQUIRE b/REQUIRE
index 6f95768c..ab5179dd 100644
--- a/REQUIRE
+++ b/REQUIRE
@@ -1,3 +1,3 @@
 julia 0.4
-Compat 0.8.0
+Compat 0.9.5
 JLD 0.6.6
diff --git a/doc/manual.md b/doc/manual.md
index f360e1a3..cfd991f8 100644
--- a/doc/manual.md
+++ b/doc/manual.md
@@ -53,7 +53,7 @@ To quickly benchmark a Julia expression, use `@benchmark`:
 ```julia
 julia> @benchmark sin(1)
 BenchmarkTools.Trial:
-  memory estimate:  0.00 bytes
+  memory estimate:  0 bytes
   allocs estimate:  0
   --------------
   minimum time:     13.00 ns (0.00% GC)
@@ -77,7 +77,7 @@ julia> tune!(b);
 
 julia> run(b)
 BenchmarkTools.Trial:
-  memory estimate:  0.00 bytes
+  memory estimate:  0 bytes
   allocs estimate:  0
   --------------
   minimum time:     13.00 ns (0.00% GC)
@@ -91,6 +91,22 @@ BenchmarkTools.Trial:
   memory tolerance: 1.00%
 ```
 
+Alternatively, you can use the `@btime` or `@belapsed` macros.
+These take exactly the same arguments as `@benchmark`, but
+behave like the `@time` or `@elapsed` macros included with
+Julia: `@btime` prints the minimum time and memory allocation
+before returning the value of the expression, while `@elapsed`
+returns the minimum time in seconds.
+
+```
+julia> @btime sin(1)
+  11.410 ns (0 allocations: 0 bytes)
+0.8414709848078965
+
+julia> @belapsed sin(1)
+1.1412412412412412e-8
+```
+
 ### Benchmark `Parameters`
 
 You can pass the following keyword arguments to `@benchmark`, `@benchmarkable`, and `run` to configure the execution process:
@@ -128,7 +144,7 @@ You can interpolate values into `@benchmark` and `@benchmarkable` expressions:
 # rand(1000) is executed for each evaluation
 julia> @benchmark sum(rand(1000))
 BenchmarkTools.Trial:
-  memory estimate:  7.92 kb
+  memory estimate:  7.92 KiB
   allocs estimate:  3
   --------------
   minimum time:     1.68 μs (0.00% GC)
@@ -145,7 +161,7 @@ BenchmarkTools.Trial:
 # value is interpolated into the benchmark expression
 julia> @benchmark sum($(rand(1000)))
 BenchmarkTools.Trial:
-  memory estimate:  0.00 bytes
+  memory estimate:  0 bytes
   allocs estimate:  0
   --------------
   minimum time:     185.00 ns (0.00% GC)
@@ -167,7 +183,7 @@ julia> A = rand(1000);
 # BAD: A is a global variable in the benchmarking context
 julia> @benchmark [i*i for i in A]
 BenchmarkTools.Trial:
-  memory estimate:  241.63 kb
+  memory estimate:  241.63 KiB
   allocs estimate:  9960
   --------------
   minimum time:     856.66 μs (0.00% GC)
@@ -183,7 +199,7 @@ BenchmarkTools.Trial:
 # GOOD: A is a constant value in the benchmarking context
 julia> @benchmark [i*i for i in $A]
 BenchmarkTools.Trial:
-  memory estimate:  7.89 kb
+  memory estimate:  7.89 KiB
   allocs estimate:  1
   --------------
   minimum time:     1.12 μs (0.00% GC)
@@ -197,6 +213,8 @@ BenchmarkTools.Trial:
   memory tolerance: 1.00%
 ```
 
+(Note that "KiB" is the SI prefix for a [kibibyte](https://en.wikipedia.org/wiki/Kibibyte): 1024 bytes.)
+
 Keep in mind that you can mutate external state from within a benchmark:
 
 ```julia
@@ -250,7 +268,7 @@ BenchmarkTools.Benchmark{symbol("##benchmark#7556")}(BenchmarkTools.Parameters(5
 
 julia> run(b)
 BenchmarkTools.Trial:
-  memory estimate:  0.0 bytes
+  memory estimate:  0 bytes
   allocs estimate:  0
   --------------
   minimum time:     6.76 ms (0.0% GC)
@@ -275,7 +293,7 @@ It's possible for LLVM and Julia's compiler to perform optimizations on `@benchm
 ```julia
 julia> @benchmark (view(a, 1:2, 1:2); 1) setup=(a = rand(3, 3))
 BenchmarkTools.Trial:
-  memory estimate:  0.00 bytes
+  memory estimate:  0 bytes
   allocs estimate:  0
   --------------
   minimum time:     2.537 ns (0.00% GC)
@@ -294,7 +312,7 @@ Note, however, that this does not mean that `view(a, 1:2, 1:2)` is non-allocatin
 ```julia
 julia> @benchmark view(a, 1:2, 1:2) setup=(a = rand(3, 3))
 BenchmarkTools.Trial:
-  memory estimate:  64.00 bytes
+  memory estimate:  64 bytes
   allocs estimate:  1
   --------------
   minimum time:     17.079 ns (0.00% GC)
@@ -330,7 +348,7 @@ Running a benchmark produces an instance of the `Trial` type:
 ```julia
 julia> t = @benchmark eig(rand(10, 10))
 BenchmarkTools.Trial:
-  memory estimate:  18.84 kb
+  memory estimate:  18.84 KiB
   allocs estimate:  70
   --------------
   minimum time:     167.17 μs (0.00% GC)
diff --git a/src/BenchmarkTools.jl b/src/BenchmarkTools.jl
index df3885e3..028b66b3 100644
--- a/src/BenchmarkTools.jl
+++ b/src/BenchmarkTools.jl
@@ -64,7 +64,9 @@ include("execution.jl")
 export tune!,
        warmup,
        @benchmark,
-       @benchmarkable
+       @benchmarkable,
+       @belapsed,
+       @btime
 
 ##########################################
 # Plotting Facilities (loaded on demand) #
diff --git a/src/execution.jl b/src/execution.jl
index 8123bd09..df35958a 100644
--- a/src/execution.jl
+++ b/src/execution.jl
@@ -28,10 +28,14 @@ end
 sample(b::Benchmark, args...) = error("no execution method defined on type $(typeof(b))")
 _run(b::Benchmark, args...; kwargs...) = error("no execution method defined on type $(typeof(b))")
 
-function Base.run(b::Benchmark, p::Parameters = b.params; kwargs...)
+# return (Trial, result) tuple, where result is the result of the benchmarked expression
+function run_result(b::Benchmark, p::Parameters = b.params; kwargs...)
     return eval(current_module(), :(BenchmarkTools._run($(b), $(p); $(kwargs...))))
 end
 
+Base.run(b::Benchmark, p::Parameters = b.params; kwargs...) =
+    run_result(b, p; kwargs...)[1]
+
 function Base.run(group::BenchmarkGroup, args...; verbose::Bool = false, pad = "", kwargs...)
     result = similar(group)
     gcscrub() # run GC before running group, even if individual benchmarks don't manually GC
@@ -202,7 +206,7 @@ macro benchmark(args...)
     end)
 end
 
-macro benchmarkable(args...)
+function benchmarkable_parts(args)
     core, params = prunekwargs(args...)
 
     # extract setup/teardown if present, removing them from the original expression
@@ -228,6 +232,12 @@ macro benchmarkable(args...)
         end
     end
 
+    return core, setup, teardown, params
+end
+
+macro benchmarkable(args...)
+    core, setup, teardown, params = benchmarkable_parts(args)
+
     # extract any variable bindings shared between the core and setup expressions
     setup_vars = isa(setup, Expr) ? collectvars(setup) : []
     core_vars = isa(core, Expr) ? collectvars(core) : []
@@ -278,7 +288,8 @@ function generate_benchmark_definition(eval_module, out_vars, setup_vars,
             __evals = __params.evals
             __gc_start = Base.gc_num()
             __start_time = time_ns()
-            for __iter in 1:__evals
+            __return_val = $(invocation)
+            for __iter in 2:__evals
                 $(invocation)
             end
             __sample_time = time_ns() - __start_time
@@ -290,7 +301,7 @@ function generate_benchmark_definition(eval_module, out_vars, setup_vars,
             __allocs = Int(fld(__gcdiff.malloc + __gcdiff.realloc +
                                __gcdiff.poolalloc + __gcdiff.bigalloc,
                                __evals))
-            return __time, __gctime, __memory, __allocs
+            return __time, __gctime, __memory, __allocs, __return_val
         end
         function BenchmarkTools.sample(b::BenchmarkTools.Benchmark{$(id)},
                                        p::BenchmarkTools.Parameters = b.params)
@@ -304,15 +315,73 @@ function generate_benchmark_definition(eval_module, out_vars, setup_vars,
             params.gctrial && BenchmarkTools.gcscrub()
             start_time = time()
             trial = BenchmarkTools.Trial(params)
-            iters = 1
-            while (time() - start_time) < params.seconds
-                params.gcsample && BenchmarkTools.gcscrub()
-                push!(trial, $(samplefunc)(params)...)
-                iters += 1
-                iters > params.samples && break
+            params.gcsample && BenchmarkTools.gcscrub()
+            s = $(samplefunc)(params)
+            push!(trial, s[1:end-1]...)
+            return_val = s[end]
+            iters = 2
+            while (time() - start_time) < params.seconds && iters ≤ params.samples
+                 params.gcsample && BenchmarkTools.gcscrub()
+                 params.gcsample && BenchmarkTools.gcscrub()
+                 push!(trial, $(samplefunc)(params)[1:end-1]...)
+                 iters += 1
             end
-            return sort!(trial)
+            return sort!(trial), return_val
         end
         BenchmarkTools.Benchmark{$(id)}($(params))
     end)
 end
+
+######################
+# convenience macros #
+######################
+
+# These macros provide drop-in replacements for the
+# Base.@time and Base.@elapsed macros, which use
+# @benchmark but yield only the minimum time.
+
+"""
+    @belapsed expression [other parameters...]
+
+Similar to the `@elapsed` macro included with Julia,
+this returns the elapsed time (in seconds) to
+execute a given expression.   It uses the `@benchmark`
+macro, however, and accepts all of the same additional
+parameters as `@benchmark`.  The returned time
+is the *minimum* elapsed time measured during the benchmark.
+"""
+macro belapsed(args...)
+    b = Expr(:macrocall, Symbol("@benchmark"), map(esc, args)...)
+    :(time(minimum($b))/1e9)
+end
+
+"""
+    @btime expression [other parameters...]
+
+Similar to the `@time` macro included with Julia,
+this executes an expression, printing the time
+it took to execute and the memory allocated before
+returning the value of the expression.
+
+Unlike `@time`, it uses the `@benchmark`
+macro, and accepts all of the same additional
+parameters as `@benchmark`.  The printed time
+is the *minimum* elapsed time measured during the benchmark.
+"""
+macro btime(args...)
+    tmp = gensym()
+    _, params = prunekwargs(args...)
+    tune_expr = hasevals(params) ? :() : :(BenchmarkTools.tune!($(tmp)))
+    return esc(quote
+        $(tmp) = BenchmarkTools.@benchmarkable $(args...)
+        BenchmarkTools.warmup($(tmp))
+        $(tune_expr)
+        b, val = BenchmarkTools.run_result($(tmp))
+        bmin = minimum(b)
+        a = allocs(bmin)
+        println("  ", BenchmarkTools.prettytime(BenchmarkTools.time(bmin)),
+                " ($a allocation", a == 1 ? "" : "s", ": ",
+                BenchmarkTools.prettymemory(BenchmarkTools.memory(bmin)), ")")
+        val
+    end)
+end
diff --git a/src/trials.jl b/src/trials.jl
index 689e674d..f5063ca7 100644
--- a/src/trials.jl
+++ b/src/trials.jl
@@ -251,13 +251,13 @@ end
 
 function prettymemory(b)
     if b < 1024
-        value, units = b, "bytes"
+        return string(b, " bytes")
     elseif b < 1024^2
-        value, units = b / 1024, "kb"
+        value, units = b / 1024, "KiB"
     elseif b < 1024^3
-        value, units = b / 1024^2, "mb"
+        value, units = b / 1024^2, "MiB"
     else
-        value, units = b / 1024^3, "gb"
+        value, units = b / 1024^3, "GiB"
     end
     return string(@sprintf("%.2f", value), " ", units)
 end
diff --git a/test/ExecutionTests.jl b/test/ExecutionTests.jl
index 809e883d..29647e00 100644
--- a/test/ExecutionTests.jl
+++ b/test/ExecutionTests.jl
@@ -2,6 +2,8 @@ module ExecutionTests
 
 using Base.Test
 using BenchmarkTools
+using Compat
+import Compat.String
 
 seteq(a, b) = length(a) == length(b) == length(intersect(a, b))
 
@@ -151,4 +153,23 @@ tune!(b)
            [u^2 for u in [1,2,3]]
        end)
 
+# this should take < 1 µs on any sane machine
+@test @belapsed(sin($(foo.x)), evals=3, samples=10, setup=(foo.x = 0)) < 1e-6
+
+let fname = tempname()
+    try
+        ret = open(fname, "w") do f
+            redirect_stdout(f) do
+                x = 1
+                y = @btime(sin($x))
+                @test y == sin(1)
+            end
+        end
+        s = readstring(fname)
+        @test ismatch(r"[0-9.]+ ns \(0 allocations: 0 bytes\)", s)
+    finally
+        isfile(fname) && rm(fname)
+    end
+end
+
 end # module
diff --git a/test/TrialsTests.jl b/test/TrialsTests.jl
index 3c235dae..5b8cbc2a 100644
--- a/test/TrialsTests.jl
+++ b/test/TrialsTests.jl
@@ -161,11 +161,11 @@ tj_r_2 = judge(tr; time_tolerance = 2.0, memory_tolerance = 2.0)
 @test BenchmarkTools.prettytime(999_999_999) == "1000.000 ms"
 @test BenchmarkTools.prettytime(1_000_000_000) == "1.000 s"
 
-@test BenchmarkTools.prettymemory(1023) == "1023.00 bytes"
-@test BenchmarkTools.prettymemory(1024) == "1.00 kb"
-@test BenchmarkTools.prettymemory(1048575) == "1024.00 kb"
-@test BenchmarkTools.prettymemory(1048576) == "1.00 mb"
-@test BenchmarkTools.prettymemory(1073741823) == "1024.00 mb"
-@test BenchmarkTools.prettymemory(1073741824) == "1.00 gb"
+@test BenchmarkTools.prettymemory(1023) == "1023 bytes"
+@test BenchmarkTools.prettymemory(1024) == "1.00 KiB"
+@test BenchmarkTools.prettymemory(1048575) == "1024.00 KiB"
+@test BenchmarkTools.prettymemory(1048576) == "1.00 MiB"
+@test BenchmarkTools.prettymemory(1073741823) == "1024.00 MiB"
+@test BenchmarkTools.prettymemory(1073741824) == "1.00 GiB"
 
 end # module

From 7b157a4c41ad36e6d279770566827bbb0d959765 Mon Sep 17 00:00:00 2001
From: "Steven G. Johnson" <stevenj@alum.mit.edu>
Date: Sat, 21 Jan 2017 07:59:39 -0500
Subject: [PATCH 2/4] more test output on at-btime test

---
 test/ExecutionTests.jl | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/test/ExecutionTests.jl b/test/ExecutionTests.jl
index 29647e00..b2aec7bf 100644
--- a/test/ExecutionTests.jl
+++ b/test/ExecutionTests.jl
@@ -153,8 +153,8 @@ tune!(b)
            [u^2 for u in [1,2,3]]
        end)
 
-# this should take < 1 µs on any sane machine
-@test @belapsed(sin($(foo.x)), evals=3, samples=10, setup=(foo.x = 0)) < 1e-6
+# this should take < 1 s on any sane machine
+@test @belapsed(sin($(foo.x)), evals=3, samples=10, setup=(foo.x = 0)) < 1
 
 let fname = tempname()
     try
@@ -166,7 +166,12 @@ let fname = tempname()
             end
         end
         s = readstring(fname)
-        @test ismatch(r"[0-9.]+ ns \(0 allocations: 0 bytes\)", s)
+        try
+            @test ismatch(r"[0-9.]+ \w*s \(0 allocations: 0 bytes\)", s)
+        catch
+            println(STDERR, "@btime output didn't match ", repr(s))
+            rethrow()
+        end
     finally
         isfile(fname) && rm(fname)
     end

From 4ce6f49aff703d9d369cb793c8dacf5a29cdd8fc Mon Sep 17 00:00:00 2001
From: "Steven G. Johnson" <stevenj@alum.mit.edu>
Date: Sat, 21 Jan 2017 09:14:16 -0500
Subject: [PATCH 3/4] more inclusive pattern in btime test

---
 test/ExecutionTests.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/ExecutionTests.jl b/test/ExecutionTests.jl
index b2aec7bf..d0c9ac0c 100644
--- a/test/ExecutionTests.jl
+++ b/test/ExecutionTests.jl
@@ -167,7 +167,7 @@ let fname = tempname()
         end
         s = readstring(fname)
         try
-            @test ismatch(r"[0-9.]+ \w*s \(0 allocations: 0 bytes\)", s)
+            @test ismatch(r"[0-9.]+ \w*s \([0-9]* allocations?: [0-9]+ bytes\)", s)
         catch
             println(STDERR, "@btime output didn't match ", repr(s))
             rethrow()

From 282a541de687f5a1a9cd598bf7cb5c7d83cfcb45 Mon Sep 17 00:00:00 2001
From: "Steven G. Johnson" <stevenj@alum.mit.edu>
Date: Sat, 21 Jan 2017 13:42:59 -0500
Subject: [PATCH 4/4] rm duplication

---
 src/execution.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/execution.jl b/src/execution.jl
index df35958a..383aef97 100644
--- a/src/execution.jl
+++ b/src/execution.jl
@@ -321,7 +321,6 @@ function generate_benchmark_definition(eval_module, out_vars, setup_vars,
             return_val = s[end]
             iters = 2
             while (time() - start_time) < params.seconds && iters ≤ params.samples
-                 params.gcsample && BenchmarkTools.gcscrub()
                  params.gcsample && BenchmarkTools.gcscrub()
                  push!(trial, $(samplefunc)(params)[1:end-1]...)
                  iters += 1