From 7e7e037b3e3efb0f1a7b5dab75d67e8f26613496 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 21 Jan 2017 00:57:03 -0500 Subject: [PATCH 1/4] added btime and belapsed macros, fixed prettymemory to use SI units --- REQUIRE | 2 +- doc/manual.md | 38 +++++++++++++----- src/BenchmarkTools.jl | 4 +- src/execution.jl | 91 +++++++++++++++++++++++++++++++++++++----- src/trials.jl | 8 ++-- test/ExecutionTests.jl | 21 ++++++++++ test/TrialsTests.jl | 12 +++--- 7 files changed, 143 insertions(+), 33 deletions(-) diff --git a/REQUIRE b/REQUIRE index 6f95768c..ab5179dd 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,3 +1,3 @@ julia 0.4 -Compat 0.8.0 +Compat 0.9.5 JLD 0.6.6 diff --git a/doc/manual.md b/doc/manual.md index f360e1a3..cfd991f8 100644 --- a/doc/manual.md +++ b/doc/manual.md @@ -53,7 +53,7 @@ To quickly benchmark a Julia expression, use `@benchmark`: ```julia julia> @benchmark sin(1) BenchmarkTools.Trial: - memory estimate: 0.00 bytes + memory estimate: 0 bytes allocs estimate: 0 -------------- minimum time: 13.00 ns (0.00% GC) @@ -77,7 +77,7 @@ julia> tune!(b); julia> run(b) BenchmarkTools.Trial: - memory estimate: 0.00 bytes + memory estimate: 0 bytes allocs estimate: 0 -------------- minimum time: 13.00 ns (0.00% GC) @@ -91,6 +91,22 @@ BenchmarkTools.Trial: memory tolerance: 1.00% ``` +Alternatively, you can use the `@btime` or `@belapsed` macros. +These take exactly the same arguments as `@benchmark`, but +behave like the `@time` or `@elapsed` macros included with +Julia: `@btime` prints the minimum time and memory allocation +before returning the value of the expression, while `@elapsed` +returns the minimum time in seconds. + +``` +julia> @btime sin(1) + 11.410 ns (0 allocations: 0 bytes) +0.8414709848078965 + +julia> @belapsed sin(1) +1.1412412412412412e-8 +``` + ### Benchmark `Parameters` You can pass the following keyword arguments to `@benchmark`, `@benchmarkable`, and `run` to configure the execution process: @@ -128,7 +144,7 @@ You can interpolate values into `@benchmark` and `@benchmarkable` expressions: # rand(1000) is executed for each evaluation julia> @benchmark sum(rand(1000)) BenchmarkTools.Trial: - memory estimate: 7.92 kb + memory estimate: 7.92 KiB allocs estimate: 3 -------------- minimum time: 1.68 μs (0.00% GC) @@ -145,7 +161,7 @@ BenchmarkTools.Trial: # value is interpolated into the benchmark expression julia> @benchmark sum($(rand(1000))) BenchmarkTools.Trial: - memory estimate: 0.00 bytes + memory estimate: 0 bytes allocs estimate: 0 -------------- minimum time: 185.00 ns (0.00% GC) @@ -167,7 +183,7 @@ julia> A = rand(1000); # BAD: A is a global variable in the benchmarking context julia> @benchmark [i*i for i in A] BenchmarkTools.Trial: - memory estimate: 241.63 kb + memory estimate: 241.63 KiB allocs estimate: 9960 -------------- minimum time: 856.66 μs (0.00% GC) @@ -183,7 +199,7 @@ BenchmarkTools.Trial: # GOOD: A is a constant value in the benchmarking context julia> @benchmark [i*i for i in $A] BenchmarkTools.Trial: - memory estimate: 7.89 kb + memory estimate: 7.89 KiB allocs estimate: 1 -------------- minimum time: 1.12 μs (0.00% GC) @@ -197,6 +213,8 @@ BenchmarkTools.Trial: memory tolerance: 1.00% ``` +(Note that "KiB" is the SI prefix for a [kibibyte](https://en.wikipedia.org/wiki/Kibibyte): 1024 bytes.) + Keep in mind that you can mutate external state from within a benchmark: ```julia @@ -250,7 +268,7 @@ BenchmarkTools.Benchmark{symbol("##benchmark#7556")}(BenchmarkTools.Parameters(5 julia> run(b) BenchmarkTools.Trial: - memory estimate: 0.0 bytes + memory estimate: 0 bytes allocs estimate: 0 -------------- minimum time: 6.76 ms (0.0% GC) @@ -275,7 +293,7 @@ It's possible for LLVM and Julia's compiler to perform optimizations on `@benchm ```julia julia> @benchmark (view(a, 1:2, 1:2); 1) setup=(a = rand(3, 3)) BenchmarkTools.Trial: - memory estimate: 0.00 bytes + memory estimate: 0 bytes allocs estimate: 0 -------------- minimum time: 2.537 ns (0.00% GC) @@ -294,7 +312,7 @@ Note, however, that this does not mean that `view(a, 1:2, 1:2)` is non-allocatin ```julia julia> @benchmark view(a, 1:2, 1:2) setup=(a = rand(3, 3)) BenchmarkTools.Trial: - memory estimate: 64.00 bytes + memory estimate: 64 bytes allocs estimate: 1 -------------- minimum time: 17.079 ns (0.00% GC) @@ -330,7 +348,7 @@ Running a benchmark produces an instance of the `Trial` type: ```julia julia> t = @benchmark eig(rand(10, 10)) BenchmarkTools.Trial: - memory estimate: 18.84 kb + memory estimate: 18.84 KiB allocs estimate: 70 -------------- minimum time: 167.17 μs (0.00% GC) diff --git a/src/BenchmarkTools.jl b/src/BenchmarkTools.jl index df3885e3..028b66b3 100644 --- a/src/BenchmarkTools.jl +++ b/src/BenchmarkTools.jl @@ -64,7 +64,9 @@ include("execution.jl") export tune!, warmup, @benchmark, - @benchmarkable + @benchmarkable, + @belapsed, + @btime ########################################## # Plotting Facilities (loaded on demand) # diff --git a/src/execution.jl b/src/execution.jl index 8123bd09..df35958a 100644 --- a/src/execution.jl +++ b/src/execution.jl @@ -28,10 +28,14 @@ end sample(b::Benchmark, args...) = error("no execution method defined on type $(typeof(b))") _run(b::Benchmark, args...; kwargs...) = error("no execution method defined on type $(typeof(b))") -function Base.run(b::Benchmark, p::Parameters = b.params; kwargs...) +# return (Trial, result) tuple, where result is the result of the benchmarked expression +function run_result(b::Benchmark, p::Parameters = b.params; kwargs...) return eval(current_module(), :(BenchmarkTools._run($(b), $(p); $(kwargs...)))) end +Base.run(b::Benchmark, p::Parameters = b.params; kwargs...) = + run_result(b, p; kwargs...)[1] + function Base.run(group::BenchmarkGroup, args...; verbose::Bool = false, pad = "", kwargs...) result = similar(group) gcscrub() # run GC before running group, even if individual benchmarks don't manually GC @@ -202,7 +206,7 @@ macro benchmark(args...) end) end -macro benchmarkable(args...) +function benchmarkable_parts(args) core, params = prunekwargs(args...) # extract setup/teardown if present, removing them from the original expression @@ -228,6 +232,12 @@ macro benchmarkable(args...) end end + return core, setup, teardown, params +end + +macro benchmarkable(args...) + core, setup, teardown, params = benchmarkable_parts(args) + # extract any variable bindings shared between the core and setup expressions setup_vars = isa(setup, Expr) ? collectvars(setup) : [] core_vars = isa(core, Expr) ? collectvars(core) : [] @@ -278,7 +288,8 @@ function generate_benchmark_definition(eval_module, out_vars, setup_vars, __evals = __params.evals __gc_start = Base.gc_num() __start_time = time_ns() - for __iter in 1:__evals + __return_val = $(invocation) + for __iter in 2:__evals $(invocation) end __sample_time = time_ns() - __start_time @@ -290,7 +301,7 @@ function generate_benchmark_definition(eval_module, out_vars, setup_vars, __allocs = Int(fld(__gcdiff.malloc + __gcdiff.realloc + __gcdiff.poolalloc + __gcdiff.bigalloc, __evals)) - return __time, __gctime, __memory, __allocs + return __time, __gctime, __memory, __allocs, __return_val end function BenchmarkTools.sample(b::BenchmarkTools.Benchmark{$(id)}, p::BenchmarkTools.Parameters = b.params) @@ -304,15 +315,73 @@ function generate_benchmark_definition(eval_module, out_vars, setup_vars, params.gctrial && BenchmarkTools.gcscrub() start_time = time() trial = BenchmarkTools.Trial(params) - iters = 1 - while (time() - start_time) < params.seconds - params.gcsample && BenchmarkTools.gcscrub() - push!(trial, $(samplefunc)(params)...) - iters += 1 - iters > params.samples && break + params.gcsample && BenchmarkTools.gcscrub() + s = $(samplefunc)(params) + push!(trial, s[1:end-1]...) + return_val = s[end] + iters = 2 + while (time() - start_time) < params.seconds && iters ≤ params.samples + params.gcsample && BenchmarkTools.gcscrub() + params.gcsample && BenchmarkTools.gcscrub() + push!(trial, $(samplefunc)(params)[1:end-1]...) + iters += 1 end - return sort!(trial) + return sort!(trial), return_val end BenchmarkTools.Benchmark{$(id)}($(params)) end) end + +###################### +# convenience macros # +###################### + +# These macros provide drop-in replacements for the +# Base.@time and Base.@elapsed macros, which use +# @benchmark but yield only the minimum time. + +""" + @belapsed expression [other parameters...] + +Similar to the `@elapsed` macro included with Julia, +this returns the elapsed time (in seconds) to +execute a given expression. It uses the `@benchmark` +macro, however, and accepts all of the same additional +parameters as `@benchmark`. The returned time +is the *minimum* elapsed time measured during the benchmark. +""" +macro belapsed(args...) + b = Expr(:macrocall, Symbol("@benchmark"), map(esc, args)...) + :(time(minimum($b))/1e9) +end + +""" + @btime expression [other parameters...] + +Similar to the `@time` macro included with Julia, +this executes an expression, printing the time +it took to execute and the memory allocated before +returning the value of the expression. + +Unlike `@time`, it uses the `@benchmark` +macro, and accepts all of the same additional +parameters as `@benchmark`. The printed time +is the *minimum* elapsed time measured during the benchmark. +""" +macro btime(args...) + tmp = gensym() + _, params = prunekwargs(args...) + tune_expr = hasevals(params) ? :() : :(BenchmarkTools.tune!($(tmp))) + return esc(quote + $(tmp) = BenchmarkTools.@benchmarkable $(args...) + BenchmarkTools.warmup($(tmp)) + $(tune_expr) + b, val = BenchmarkTools.run_result($(tmp)) + bmin = minimum(b) + a = allocs(bmin) + println(" ", BenchmarkTools.prettytime(BenchmarkTools.time(bmin)), + " ($a allocation", a == 1 ? "" : "s", ": ", + BenchmarkTools.prettymemory(BenchmarkTools.memory(bmin)), ")") + val + end) +end diff --git a/src/trials.jl b/src/trials.jl index 689e674d..f5063ca7 100644 --- a/src/trials.jl +++ b/src/trials.jl @@ -251,13 +251,13 @@ end function prettymemory(b) if b < 1024 - value, units = b, "bytes" + return string(b, " bytes") elseif b < 1024^2 - value, units = b / 1024, "kb" + value, units = b / 1024, "KiB" elseif b < 1024^3 - value, units = b / 1024^2, "mb" + value, units = b / 1024^2, "MiB" else - value, units = b / 1024^3, "gb" + value, units = b / 1024^3, "GiB" end return string(@sprintf("%.2f", value), " ", units) end diff --git a/test/ExecutionTests.jl b/test/ExecutionTests.jl index 809e883d..29647e00 100644 --- a/test/ExecutionTests.jl +++ b/test/ExecutionTests.jl @@ -2,6 +2,8 @@ module ExecutionTests using Base.Test using BenchmarkTools +using Compat +import Compat.String seteq(a, b) = length(a) == length(b) == length(intersect(a, b)) @@ -151,4 +153,23 @@ tune!(b) [u^2 for u in [1,2,3]] end) +# this should take < 1 µs on any sane machine +@test @belapsed(sin($(foo.x)), evals=3, samples=10, setup=(foo.x = 0)) < 1e-6 + +let fname = tempname() + try + ret = open(fname, "w") do f + redirect_stdout(f) do + x = 1 + y = @btime(sin($x)) + @test y == sin(1) + end + end + s = readstring(fname) + @test ismatch(r"[0-9.]+ ns \(0 allocations: 0 bytes\)", s) + finally + isfile(fname) && rm(fname) + end +end + end # module diff --git a/test/TrialsTests.jl b/test/TrialsTests.jl index 3c235dae..5b8cbc2a 100644 --- a/test/TrialsTests.jl +++ b/test/TrialsTests.jl @@ -161,11 +161,11 @@ tj_r_2 = judge(tr; time_tolerance = 2.0, memory_tolerance = 2.0) @test BenchmarkTools.prettytime(999_999_999) == "1000.000 ms" @test BenchmarkTools.prettytime(1_000_000_000) == "1.000 s" -@test BenchmarkTools.prettymemory(1023) == "1023.00 bytes" -@test BenchmarkTools.prettymemory(1024) == "1.00 kb" -@test BenchmarkTools.prettymemory(1048575) == "1024.00 kb" -@test BenchmarkTools.prettymemory(1048576) == "1.00 mb" -@test BenchmarkTools.prettymemory(1073741823) == "1024.00 mb" -@test BenchmarkTools.prettymemory(1073741824) == "1.00 gb" +@test BenchmarkTools.prettymemory(1023) == "1023 bytes" +@test BenchmarkTools.prettymemory(1024) == "1.00 KiB" +@test BenchmarkTools.prettymemory(1048575) == "1024.00 KiB" +@test BenchmarkTools.prettymemory(1048576) == "1.00 MiB" +@test BenchmarkTools.prettymemory(1073741823) == "1024.00 MiB" +@test BenchmarkTools.prettymemory(1073741824) == "1.00 GiB" end # module From 7b157a4c41ad36e6d279770566827bbb0d959765 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 21 Jan 2017 07:59:39 -0500 Subject: [PATCH 2/4] more test output on at-btime test --- test/ExecutionTests.jl | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/test/ExecutionTests.jl b/test/ExecutionTests.jl index 29647e00..b2aec7bf 100644 --- a/test/ExecutionTests.jl +++ b/test/ExecutionTests.jl @@ -153,8 +153,8 @@ tune!(b) [u^2 for u in [1,2,3]] end) -# this should take < 1 µs on any sane machine -@test @belapsed(sin($(foo.x)), evals=3, samples=10, setup=(foo.x = 0)) < 1e-6 +# this should take < 1 s on any sane machine +@test @belapsed(sin($(foo.x)), evals=3, samples=10, setup=(foo.x = 0)) < 1 let fname = tempname() try @@ -166,7 +166,12 @@ let fname = tempname() end end s = readstring(fname) - @test ismatch(r"[0-9.]+ ns \(0 allocations: 0 bytes\)", s) + try + @test ismatch(r"[0-9.]+ \w*s \(0 allocations: 0 bytes\)", s) + catch + println(STDERR, "@btime output didn't match ", repr(s)) + rethrow() + end finally isfile(fname) && rm(fname) end From 4ce6f49aff703d9d369cb793c8dacf5a29cdd8fc Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 21 Jan 2017 09:14:16 -0500 Subject: [PATCH 3/4] more inclusive pattern in btime test --- test/ExecutionTests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/ExecutionTests.jl b/test/ExecutionTests.jl index b2aec7bf..d0c9ac0c 100644 --- a/test/ExecutionTests.jl +++ b/test/ExecutionTests.jl @@ -167,7 +167,7 @@ let fname = tempname() end s = readstring(fname) try - @test ismatch(r"[0-9.]+ \w*s \(0 allocations: 0 bytes\)", s) + @test ismatch(r"[0-9.]+ \w*s \([0-9]* allocations?: [0-9]+ bytes\)", s) catch println(STDERR, "@btime output didn't match ", repr(s)) rethrow() From 282a541de687f5a1a9cd598bf7cb5c7d83cfcb45 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 21 Jan 2017 13:42:59 -0500 Subject: [PATCH 4/4] rm duplication --- src/execution.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/execution.jl b/src/execution.jl index df35958a..383aef97 100644 --- a/src/execution.jl +++ b/src/execution.jl @@ -321,7 +321,6 @@ function generate_benchmark_definition(eval_module, out_vars, setup_vars, return_val = s[end] iters = 2 while (time() - start_time) < params.seconds && iters ≤ params.samples - params.gcsample && BenchmarkTools.gcscrub() params.gcsample && BenchmarkTools.gcscrub() push!(trial, $(samplefunc)(params)[1:end-1]...) iters += 1