WIP: initial benchmark example using manifest spec

johnnychen94 · Feb 13, 2022 · 9006036 · 9006036
1 parent 653f02a
commit 9006036
Show file tree

Hide file tree

Showing 12 changed files with 188 additions and 3 deletions.
diff --git a/docs/src/dialects/manifest.md b/docs/src/dialects/manifest.md
@@ -16,7 +16,7 @@ id = "1"
 groups = ["Type:LinearAlgebra", "Type:Benchmark", "Framework:Julia"]
 deps = ["scripts/julia"]
 outs = ["@__STDOUT__"]
-runner = "julia"
+runner = "juliamodule"
 [tasks.run]
 script = "scripts/julia/sum.jl"
 
@@ -26,7 +26,7 @@ id = "2"
 groups = ["Type:LinearAlgebra", "Type:Benchmark", "Framework:Julia"]
 deps = ["scripts/julia"]
 outs = ["@__STDOUT__"]
-runner = "julia"
+runner = "juliamodule"
 [tasks.run]
 script = "scripts/julia/rand.jl"
 
@@ -36,7 +36,7 @@ id = "3"
 groups = ["Type:LinearAlgebra", "Type:Benchmark"]
 deps = ["@__STDOUT__"]
 outs = ["reports/results.csv"]
-runner = "julia"
+runner = "juliamodule"
 [tasks.run]
 script = "summary.jl"
 ```

diff --git a/examples/manifest/benchmark/.gitignore b/examples/manifest/benchmark/.gitignore
@@ -0,0 +1 @@
+reports/
diff --git a/examples/manifest/benchmark/Project.toml b/examples/manifest/benchmark/Project.toml
@@ -0,0 +1,8 @@
+[deps]
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
+Workflows = "115008b9-7a42-4cba-af26-8bebb992e909"
diff --git a/examples/manifest/benchmark/README.md b/examples/manifest/benchmark/README.md
@@ -0,0 +1,6 @@
+# Benchmark example with the manfiest dialect
+
+This demo benchmarks Julia with Numpy on a few functions `sum`, `rand` and `randn` using scripts
+in `scripts`.
+
+Package `numpy` is required for the python executable.
diff --git a/examples/manifest/benchmark/benchmark.toml b/examples/manifest/benchmark/benchmark.toml
@@ -0,0 +1,58 @@
+version = "0.1"
+dialect = "manifest"
+
+order = [["1", "2", "3", "4"], ["5"]]
+
+[[tasks]]
+name = "sum"
+id = "1"
+groups = ["Type:LinearAlgebra", "Type:Benchmark", "Framework:Julia"]
+deps = ["scripts/julia"]
+runner = "juliamodule"
+[tasks.run]
+script = "scripts/julia/sum.jl"
+
+[[tasks]]
+name = "rand"
+id = "2"
+groups = ["Type:LinearAlgebra", "Type:Benchmark", "Framework:Julia"]
+deps = ["scripts/julia"]
+runner = "juliamodule"
+[tasks.run]
+script = "scripts/julia/rand.jl"
+
+[[tasks]]
+name = "sum"
+id = "3"
+groups = ["Type:LinearAlgebra", "Type:Benchmark", "Framework:Numpy"]
+deps = ["scripts/python"]
+runner = "shell"
+[tasks.run]
+command = "python scripts/numpy/sum.py"
+capture = true
+
+[[tasks]]
+name = "randn"
+id = "4"
+groups = ["Type:LinearAlgebra", "Type:Benchmark", "Framework:Numpy"]
+deps = ["scripts/python"]
+runner = "shell"
+[tasks.run]
+command = "python scripts/numpy/randn.py"
+capture = true
+
+[[tasks]]
+name = "summary"
+id = "5"
+groups = ["Type:LinearAlgebra", "Type:Benchmark"]
+deps = [
+    "summary.jl",
+    "@__STDOUT__1",
+    "@__STDOUT__2",
+    "@__STDOUT__3",
+    "@__STDOUT__4",
+]
+outs = ["reports"]
+runner = "shell"
+[tasks.run]
+command = "julia --startup=no --project=. summary.jl"
diff --git a/examples/manifest/benchmark/scripts/julia/rand.jl b/examples/manifest/benchmark/scripts/julia/rand.jl
@@ -0,0 +1,12 @@
+using LinearAlgebra
+using BenchmarkTools
+using JSON3
+include(joinpath(@__DIR__, "utils.jl"))
+
+rst = Dict()
+for n in [64, 128, 256, 512, 1024, 2048]
+    b = @benchmark rand($n) samples=100 evals=1
+    rst[n] = trial_to_dict(b)
+end
+
+JSON3.write(rst)
diff --git a/examples/manifest/benchmark/scripts/julia/sum.jl b/examples/manifest/benchmark/scripts/julia/sum.jl
@@ -0,0 +1,13 @@
+using LinearAlgebra
+using BenchmarkTools
+using JSON3
+include(joinpath(@__DIR__, "utils.jl"))
+
+rst = Dict()
+for n in [64, 128, 256, 512, 1024, 2048, 4096, 8192]
+    x = rand(n)
+    b = @benchmark sum($x) samples=100 evals=1
+    rst[n] = trial_to_dict(b)
+end
+
+JSON3.write(rst)
diff --git a/examples/manifest/benchmark/scripts/julia/utils.jl b/examples/manifest/benchmark/scripts/julia/utils.jl
@@ -0,0 +1,10 @@
+using BenchmarkTools
+
+function trial_to_dict(trial::BenchmarkTools.Trial)
+    d = Dict{String, Float64}()
+    d["time"] = mean(trial.times)
+    d["gctimes"] = mean(trial.gctimes)
+    d["allocs"] = trial.allocs
+    d["memory"] = trial.memory
+    return d
+end
diff --git a/examples/manifest/benchmark/scripts/numpy/randn.py b/examples/manifest/benchmark/scripts/numpy/randn.py
@@ -0,0 +1,9 @@
+import numpy as np
+import json
+from utils import belapsed
+
+rst: dict = {}
+for n in [64, 128, 256, 512, 1024, 2048]:
+    rst[n] = {"time": belapsed(lambda: np.random.randn(n), number=100)}
+
+print(json.dumps(rst))
diff --git a/examples/manifest/benchmark/scripts/numpy/sum.py b/examples/manifest/benchmark/scripts/numpy/sum.py
@@ -0,0 +1,10 @@
+import numpy as np
+import json
+from utils import belapsed
+
+rst: dict = {}
+for n in [64, 128, 256, 512, 1024, 2048]:
+    x = np.random.rand(n)
+    rst[n] = {"time": belapsed(lambda: x.sum(), number=100)}
+
+print(json.dumps(rst))
diff --git a/examples/manifest/benchmark/scripts/numpy/utils.py b/examples/manifest/benchmark/scripts/numpy/utils.py
@@ -0,0 +1,10 @@
+import timeit
+
+
+def belapsed(func, *, number=None):
+    if number:
+        return timeit.timeit(func, number=number) / number
+    else:
+        timer = timeit.Timer(func)
+        n, t = timer.autorange()
+        return t / n
diff --git a/examples/manifest/benchmark/summary.jl b/examples/manifest/benchmark/summary.jl
@@ -0,0 +1,48 @@
+src_file = get(ENV, "WORKFLOW_TMP_INFILE", "")
+@assert isfile(src_file) "file $src_file not existed"
+
+using JSON3
+using DataFrames
+using CSV
+using PrettyTables
+
+data = open(src_file) do io
+    JSON3.read(io)
+end
+
+# flatten and merge results into one single big dataframe
+dfs = map(String.(keys(data))) do tid
+    X = JSON3.read(data[tid])
+    X_df = map(collect(keys(X))) do sz
+        d = Dict(X[sz])
+        d[:size] = parse(Int, String(sz))
+        d[:tid] = tid
+        d
+    end |> DataFrame
+end
+df = reduce(dfs) do X, Y
+    outerjoin(X, Y; on=intersect(names(X), names(Y)), matchmissing=:equal)
+end
+
+# format markdown reports
+buffer = IOBuffer()
+for df_sz in groupby(df, :size)
+    println(buffer, "# size: ", df_sz[!, :size][1], "\n")
+
+    # drop the types line provided by DataFrames
+    tmp_buffer = IOBuffer()
+    PrettyTables.pretty_table(
+        tmp_buffer,
+        df_sz;
+        tf=PrettyTables.tf_markdown)
+    lines = split(String(take!(tmp_buffer)), "\n")
+    println(buffer, lines[1])
+    foreach(l->println(buffer, l), lines[3:end])
+
+    println(buffer)
+end
+
+# save final results
+isdir("reports") || mkdir("reports")
+CSV.write("reports/results.csv", df)
+write("reports/report.md", take!(buffer))