Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

initial manifest workflow specification #8

Merged
merged 7 commits into from
Feb 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/UnitTest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
strategy:
fail-fast: false
matrix:
julia-version: ['1.0', '1', 'nightly']
julia-version: ['1.6', '1', 'nightly']
os: [ubuntu-latest]
arch: [x64]
include:
Expand Down
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
.vscode/

*.jl.*.cov
*.jl.cov
*.jl.mem
Manifest.toml
/docs/build/

__pycache__/
.mypy_cache/

.workflows
15 changes: 13 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,22 @@ uuid = "115008b9-7a42-4cba-af26-8bebb992e909"
authors = ["Johnny Chen <[email protected]>"]
version = "0.1.0"

[deps]
Configurations = "5218b696-f38b-4ac9-8b61-a12ec717816d"
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"

[compat]
julia = "1"
Configurations = "0.17"
JSON3 = "1"
TOML = "1"
julia = "1.6"

[extras]
Suppressor = "fd094767-a336-5f1f-9728-57cf17d0bbfb"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test"]
test = ["Suppressor", "Test"]
2 changes: 2 additions & 0 deletions dialects.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[manifest]
version = "0.1.0"
1 change: 1 addition & 0 deletions examples/manifest/benchmark/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
reports/
8 changes: 8 additions & 0 deletions examples/manifest/benchmark/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[deps]
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
Workflows = "115008b9-7a42-4cba-af26-8bebb992e909"
6 changes: 6 additions & 0 deletions examples/manifest/benchmark/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Benchmark example with the manfiest dialect

This demo benchmarks Julia with Numpy on a few functions `sum`, `rand` and `randn` using scripts
in `scripts`.

Package `numpy` is required for the python executable.
58 changes: 58 additions & 0 deletions examples/manifest/benchmark/benchmark.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
version = "0.1"
dialect = "manifest"

order = [["1", "2", "3", "4"], ["5"]]

[[tasks]]
name = "sum"
id = "1"
groups = ["Type:LinearAlgebra", "Type:Benchmark", "Framework:Julia"]
deps = ["scripts/julia"]
runner = "juliamodule"
[tasks.run]
script = "scripts/julia/sum.jl"

[[tasks]]
name = "rand"
id = "2"
groups = ["Type:LinearAlgebra", "Type:Benchmark", "Framework:Julia"]
deps = ["scripts/julia"]
runner = "juliamodule"
[tasks.run]
script = "scripts/julia/rand.jl"

[[tasks]]
name = "sum"
id = "3"
groups = ["Type:LinearAlgebra", "Type:Benchmark", "Framework:Numpy"]
deps = ["scripts/python"]
runner = "shell"
[tasks.run]
command = "python scripts/numpy/sum.py"
capture = true

[[tasks]]
name = "randn"
id = "4"
groups = ["Type:LinearAlgebra", "Type:Benchmark", "Framework:Numpy"]
deps = ["scripts/python"]
runner = "shell"
[tasks.run]
command = "python scripts/numpy/randn.py"
capture = true

[[tasks]]
name = "summary"
id = "5"
groups = ["Type:LinearAlgebra", "Type:Benchmark"]
deps = [
"summary.jl",
"@__STDOUT__1",
"@__STDOUT__2",
"@__STDOUT__3",
"@__STDOUT__4",
]
outs = ["reports"]
runner = "shell"
[tasks.run]
command = "julia --startup=no --project=. summary.jl"
12 changes: 12 additions & 0 deletions examples/manifest/benchmark/scripts/julia/rand.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
using LinearAlgebra
using BenchmarkTools
using JSON3
include(joinpath(@__DIR__, "utils.jl"))

rst = Dict()
for n in [64, 128, 256, 512, 1024, 2048]
b = @benchmark rand($n) samples=100 evals=1
rst[n] = trial_to_dict(b)
end

JSON3.write(rst)
13 changes: 13 additions & 0 deletions examples/manifest/benchmark/scripts/julia/sum.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
using LinearAlgebra
using BenchmarkTools
using JSON3
include(joinpath(@__DIR__, "utils.jl"))

rst = Dict()
for n in [64, 128, 256, 512, 1024, 2048, 4096, 8192]
x = rand(n)
b = @benchmark sum($x) samples=100 evals=1
rst[n] = trial_to_dict(b)
end

JSON3.write(rst)
10 changes: 10 additions & 0 deletions examples/manifest/benchmark/scripts/julia/utils.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
using BenchmarkTools

function trial_to_dict(trial::BenchmarkTools.Trial)
d = Dict{String, Float64}()
d["time"] = mean(trial.times)
d["gctimes"] = mean(trial.gctimes)
d["allocs"] = trial.allocs
d["memory"] = trial.memory
return d
end
9 changes: 9 additions & 0 deletions examples/manifest/benchmark/scripts/numpy/randn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import numpy as np
import json
from utils import belapsed

rst: dict = {}
for n in [64, 128, 256, 512, 1024, 2048]:
rst[n] = {"time": belapsed(lambda: np.random.randn(n), number=100)}

print(json.dumps(rst))
10 changes: 10 additions & 0 deletions examples/manifest/benchmark/scripts/numpy/sum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import numpy as np
import json
from utils import belapsed

rst: dict = {}
for n in [64, 128, 256, 512, 1024, 2048]:
x = np.random.rand(n)
rst[n] = {"time": belapsed(lambda: x.sum(), number=100)}

print(json.dumps(rst))
10 changes: 10 additions & 0 deletions examples/manifest/benchmark/scripts/numpy/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import timeit


def belapsed(func, *, number=None):
if number:
return timeit.timeit(func, number=number) / number
else:
timer = timeit.Timer(func)
n, t = timer.autorange()
return t / n
48 changes: 48 additions & 0 deletions examples/manifest/benchmark/summary.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
src_file = get(ENV, "WORKFLOW_TMP_INFILE", "")
@assert isfile(src_file) "file $src_file not existed"

using JSON3
using DataFrames
using CSV
using PrettyTables

data = open(src_file) do io
JSON3.read(io)
end

# flatten and merge results into one single big dataframe
dfs = map(String.(keys(data))) do tid
X = JSON3.read(data[tid])
X_df = map(collect(keys(X))) do sz
d = Dict(X[sz])
d[:size] = parse(Int, String(sz))
d[:tid] = tid
d
end |> DataFrame
end
df = reduce(dfs) do X, Y
outerjoin(X, Y; on=intersect(names(X), names(Y)), matchmissing=:equal)
end

# format markdown reports
buffer = IOBuffer()
for df_sz in groupby(df, :size)
println(buffer, "# size: ", df_sz[!, :size][1], "\n")

# drop the types line provided by DataFrames
tmp_buffer = IOBuffer()
PrettyTables.pretty_table(
tmp_buffer,
df_sz;
tf=PrettyTables.tf_markdown)
lines = split(String(take!(tmp_buffer)), "\n")
println(buffer, lines[1])
foreach(l->println(buffer, l), lines[3:end])

println(buffer)
end

# save final results
isdir("reports") || mkdir("reports")
CSV.write("reports/results.csv", df)
write("reports/report.md", take!(buffer))
12 changes: 11 additions & 1 deletion src/Workflows.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
module Workflows

# Write your package code here.
using Configurations
using SHA
using JSON3

include("dialects/Dialects.jl")
using .Dialects: load_config, save_config
using .Dialects: AbstractWorkflow, ManifestWorkflow
using .Dialects: task_id, task_deps
include("runners/Runners.jl")
using .Runners: execute_task
include("scheduler.jl")

end
24 changes: 24 additions & 0 deletions src/dialects/Dialects.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
module Dialects

using Configurations
using TOML
using Printf

import Configurations: from_dict, to_dict

const spec_versions = begin
config = TOML.parsefile(joinpath(@__DIR__, "..", "..", "dialects.toml"))
Dict(k=>VersionNumber(v["version"]) for (k, v) in config)
end

abstract type AbstractTask end
abstract type AbstractWorkflow end
abstract type AbstractExecutionOrder end

include("traits.jl")
include("orders.jl")
include("manifest.jl")
include("utils.jl")
include("config_io.jl")

end #module
54 changes: 54 additions & 0 deletions src/dialects/config_io.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""
load_config(filename::AbstractString)

Load workflow configuration from `filename`.
"""
function load_config(filename::AbstractString)
name, ext = splitext(basename(filename))
config = if ext == ".toml"
# https://toml.io/en/v1.0.0#filename-extension
# TOML files should use the extension `.toml`
TOML.parsefile(filename)
else
throw(ArgumentError("unsupported file extension: \"$ext\"."))
end
dialect = config["dialect"] # required
ver = config["version"] # required
check_version(dialect, VersionNumber(ver))

# runtime dispatch to custom dialect implementation
return load_config(Val(Symbol(dialect)), config)
end
load_config(::Val{d}, config) where d = error("unsupported workflow dialect \"$d\".")

"""
save_config(filename::AbstractString, workflow)

Save workflow configuration into `filename`.
"""
function save_config(filename::AbstractString, workflow::AbstractWorkflow)
name, ext = splitext(basename(filename))
if ext == ".toml"
config = to_dict(workflow, TOMLStyle)
config["version"] = string(spec_versions[workflow.dialect])
open(filename, "w") do io
TOML.print(convert_to_builtin, io, config)
end
else
throw(ArgumentError("unsupported file extension: \"$ext\"."))
end
return
end

# some custom types need to be converted to built in types before serialization
convert_to_builtin(p::PipelineOrder) = p.stages
convert_to_builtin(v::VersionNumber) = string(v)
convert_to_builtin(v) = v

# TOML support
function Configurations.to_toml(io::IO, x::AbstractWorkflow; kwargs...)
to_toml(convert_to_builtin, io, x; kwargs...)
end
function Configurations.to_toml(filename::String, x::AbstractWorkflow; kwargs...)
to_toml(convert_to_builtin, filename, x; kwargs...)
end
Loading