From 7a35839599c1b1bd89618459e7e385525f05f90a Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Mon, 27 Mar 2017 10:43:00 +0000 Subject: [PATCH 1/4] Import revamped nix benchmark framework The nix code for running benchmarks is now imported in-tree. This makes it easy to automatically run benchmarks and also generate the fancy R visualizations. See README.md for example usage. The visualizations are now generated as PNG files instead of an Rmarkdown document. This is to make it easier to introduce new visualizations, or variations of existing ones, and easier to load large images for zooming. Under the hood the nix code is also updated so that each test run is a separate derivation. This means that nix is able to parallelize the test runs (one execution of each benchmark with one raptorjit version) and distribute them between machines. This should make the tests run faster on the Hydra CI cluster and also avoid tying up whole servers with hours-long derivations that run hundreds of test runs at the same time. (It also allows you to parallelize test runs on the local machine to use multiple cores.) --- README.md | 33 ++++++++++++ testsuite/bench/bench.R | 49 +++++++++++++++++ testsuite/bench/default.nix | 103 ++++++++++++++++++++++++++++++++++++ testsuite/bench/generate.R | 25 +++++++++ 4 files changed, 210 insertions(+) create mode 100644 testsuite/bench/bench.R create mode 100644 testsuite/bench/default.nix create mode 100755 testsuite/bench/generate.R diff --git a/README.md b/README.md index 244264570e..2c3a954c61 100644 --- a/README.md +++ b/README.md @@ -116,6 +116,39 @@ $ make ... but make sure you have at least `make`, `clang`, and `luajit` in your `$PATH`. +### Run the benchmarks + +Nix can also run the full benchmark suite and generate visualizations +with R/ggplot2. + +The simplest incantation tests one branch: + +```shell +$ nix-build testsuite/bench --arg Asrc ./. # note: ./. means ./ +``` + +You can also test several branches (A-E), give them names, specify +command-line arguments, say how many tests to run, and allow parallel +execution: + +```shell +$ nix-build testsuite/bench \ + --arg Asrc ~/git/raptorjit \ + --argstr Aname master \ + --arg Bsrc ~/git/raptorjit-hack \ + --argstr Bname hacked \ + --arg Csrc ~/git/raptorjit-hack2 \ + --argstr Cname hacked-O1 \ + --argstr Cargs -O1 \ + --arg runs 100 \ + -j 5 # Run up to 5 tests in parallel +'' + +If you are using a distributed nix environment such +as [Hydra](https://nixos.org/hydra/) then the tests can be +automatically parallelized and distributed across a suitable build +farm. + ### Quotes Here are some borrowed words to put this branch into context: diff --git a/testsuite/bench/bench.R b/testsuite/bench/bench.R new file mode 100644 index 0000000000..e76382bf15 --- /dev/null +++ b/testsuite/bench/bench.R @@ -0,0 +1,49 @@ +# R subroutines for reading and visualizing benchmark results. + +suppressPackageStartupMessages({ + library(dplyr) + library(ggplot2) +}) + +## R library routines for analyzing benchmark results +bench.read <- function(filename) { + data <- read.csv(filename) + ## baseline is the mean performance of the "A" version + baseline <- data %>% + filter(letter=="A") %>% + group_by(benchmark) %>% + summarize(baseline = mean(cycles)) + ## Add 'relative' performance column: compared to mean from baseline branch + relative <- data %>% + left_join(baseline, by="benchmark") %>% + group_by(benchmark, version) %>% + mutate(relative = first(baseline) / cycles) + return(relative) +} + +## Jitter plot faceted by benchmark +bench.jitterplot <- function(data) { + ggplot(aes(y=relative, x=version, color=version), data=data) + + geom_jitter(shape=1, alpha=0.5) + + scale_y_continuous(breaks=seq(0, 3, 0.1), labels=scales::percent) + + theme(aspect.ratio = 1) + + theme(axis.text.x = element_text(angle=90)) + + ylab("Performance relative to baseline average") + + ggtitle("Comparative performance between RaptorJIT versions") + + facet_wrap(~ benchmark, scales="free_x") +} + +## ECDF plot faceted by benchmark +bench.ecdfplot <- function(data) { + ggplot(aes(x=relative, color=version), data=data) + + stat_ecdf() + + scale_x_continuous(labels=scales::percent) + + scale_y_log10(labels=scales::percent) + + theme(aspect.ratio = 1) + + theme(axis.text.x = element_text(angle=90)) + + ylab("Performance relative to baseline average") + + xlab("Percentage of results at or above this performance level") + + ggtitle("Comparative performance between RaptorJIT variants") + + facet_wrap(~ benchmark) +} + diff --git a/testsuite/bench/default.nix b/testsuite/bench/default.nix new file mode 100644 index 0000000000..8d95a071d2 --- /dev/null +++ b/testsuite/bench/default.nix @@ -0,0 +1,103 @@ +# Run a large parallel benchmark campaign and generate R/ggplot2 reports. + +{ pkgs ? (import ../../pkgs.nix) {}, + Asrc, Aname ? "A", Aargs ? "", + Bsrc ? null, Bname ? "B", Bargs ? "", + Csrc ? null, Cname ? "C", Cargs ? "", + Dsrc ? null, Dname ? "D", Dargs ? "", + Esrc ? null, Ename ? "E", Eargs ? "", + hardware ? null, + runs ? 30 }: + +with pkgs; +with stdenv; + +# Derivation to run benchmarks and produce a CSV result. +let benchmark = letter: name: src: args: run: + let raptorjit = (import src {inherit pkgs; version = name;}).raptorjit; in + mkDerivation { + name = "benchmark-${name}-${toString run}"; + src = pkgs.lib.cleanSource ./.; + # Force consistent hardware + requiredSystemFeatures = if hardware != null then [hardware] else []; + buildInputs = [ raptorjit linuxPackages.perf utillinux ]; + buildPhase = '' + # Run multiple iterations of the benchmarks + echo "Run $run" + mkdir -p result/$run + # Run each individual benchmark + cat PARAM_x86_CI.txt | + (while read benchmark params; do + echo "running $benchmark" + # Execute with performance monitoring & time supervision + # Note: discard stdout due to overwhelming output + timeout -sKILL 60 \ + perf stat -x, -o result/$run/$benchmark.perf \ + raptorjit ${args} -e "math.randomseed(${toString run})" $benchmark.lua $params \ + > /dev/null || \ + rm result/$run/$benchmark.perf + done) + ''; + installPhase = '' + # Copy the raw perf output for reference + cp -r result $out + # Log the exact CPU + lscpu > $out/cpu.txt + # Create a CSV file + # Create the rows based on the perf logs + for result in result/*.perf; do + version=${name} + benchmark=$(basename -s.perf -a $result) + instructions=$(awk -F, -e '$3 == "instructions" { print $1; }' $result) + cycles=$( awk -F, -e '$3 == "cycles" { print $1; }' $result) + echo ${letter},$version,$benchmark,${toString run},$instructions,$cycles >> $out/bench.csv + done + ''; + }; + +# Run a set of benchmarks and aggregate the results into a CSV file. +# Each benchmark run is a separate derivation. This allows nix to +# parallelize and distribute the benchmarking. + benchmarkSet = letter: name: src: args: + let benchmarks = map (benchmark letter name src args) (pkgs.lib.range 1 runs); + in + runCommand "benchmarks-${name}" { buildInputs = benchmarks; } '' + source $stdenv/setup + mkdir -p $out + for dir in ${pkgs.lib.fold (acc: x: "${acc} ${x}") "" benchmarks}; do + cat $dir/bench.csv >> $out/bench.csv + done + ''; + + benchA = (benchmarkSet "A" Aname Asrc Aargs); + benchB = if Bsrc != null then (benchmarkSet "B" Bname Bsrc Bargs) else ""; + benchC = if Csrc != null then (benchmarkSet "C" Cname Csrc Cargs) else ""; + benchD = if Dsrc != null then (benchmarkSet "D" Dname Dsrc Dargs) else ""; + benchE = if Esrc != null then (benchmarkSet "E" Ename Esrc Eargs) else ""; +in + +rec { + benchmarkResults = mkDerivation { + name = "benchmark-results"; + buildInputs = with pkgs.rPackages; [ pkgs.R ggplot2 dplyr ]; + builder = pkgs.writeText "builder.csv" '' + source $stdenv/setup + # Get the CSV file + mkdir -p $out/nix-support + echo "letter,version,benchmark,run,instructions,cycles" > bench.csv + cat ${benchA}/bench.csv >> bench.csv + [ -n "${benchB}" ] && cat ${benchB}/bench.csv >> bench.csv + [ -n "${benchC}" ] && cat ${benchC}/bench.csv >> bench.csv + [ -n "${benchD}" ] && cat ${benchD}/bench.csv >> bench.csv + [ -n "${benchE}" ] && cat ${benchE}/bench.csv >> bench.csv + cp bench.csv $out + echo "file CSV $out/bench.csv" >> $out/nix-support/hydra-build-products + # Generate the report + (cd ${./.}; Rscript ./generate.R $out/bench.csv $out) + for png in $out/*.png; do + echo "file PNG $png" >> $out/nix-support/hydra-build-products + done + ''; + }; +} + diff --git a/testsuite/bench/generate.R b/testsuite/bench/generate.R new file mode 100755 index 0000000000..fb43f01b1f --- /dev/null +++ b/testsuite/bench/generate.R @@ -0,0 +1,25 @@ +#!/usr/bin/env nix-shell +#!nix-shell -i Rscript -p R rpkgs.dplyr rpkgs.ggplot2 + +# R command-line program for making visualizations from benchmark results. + +suppressWarnings(source("bench.R")) + +args <- commandArgs(trailingOnly=T) +if (length(args) != 2) { + message("Usage: generate.R "); quit(status=1) +} + +filename <- args[[1]] +outdir <- args[[2]] + +data <- bench.read(filename) +if (!dir.exists(outdir)) { dir.create(outdir, recursive=T) } + +ggsave(filename = file.path(outdir,"bench-jitter.png"), + plot = bench.jitterplot(data), + width=12, height=12) + +ggsave(filename = file.path(outdir,"bench-ecdf.png"), + plot = bench.ecdfplot(data), + width=12, height=12) From b1b52db19b38e4b86ce77dcae9a734e3f77f148e Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Mon, 27 Mar 2017 11:00:18 +0000 Subject: [PATCH 2/4] Fix markdown goof in README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2c3a954c61..daf8fa972c 100644 --- a/README.md +++ b/README.md @@ -142,7 +142,7 @@ $ nix-build testsuite/bench \ --argstr Cargs -O1 \ --arg runs 100 \ -j 5 # Run up to 5 tests in parallel -'' +``` If you are using a distributed nix environment such as [Hydra](https://nixos.org/hydra/) then the tests can be From f5aa32905a4da78be60d581ca0e12e381856278b Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Mon, 27 Mar 2017 11:02:37 +0000 Subject: [PATCH 3/4] README.md: Add note about where benchmark visualizations go --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index daf8fa972c..3d81927184 100644 --- a/README.md +++ b/README.md @@ -132,6 +132,7 @@ command-line arguments, say how many tests to run, and allow parallel execution: ```shell +# Run the benchmarks and create result visualizations result/ $ nix-build testsuite/bench \ --arg Asrc ~/git/raptorjit \ --argstr Aname master \ From 43181637fb66dab6d4286be3b4c511cbecf4cfed Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Fri, 24 Mar 2017 19:41:23 +0000 Subject: [PATCH 4/4] testsuite/bench: Tweaked tests to run longer Tweaked parameters so that shorter tests run for longer, typically 2-3 seconds each. This is to reduce sensitivity to potential disturbance of results by small events during startup e.g. slow system calls. --- testsuite/bench/PARAM_x86_CI.txt | 13 ++++++------- testsuite/bench/life.lua | 2 +- testsuite/bench/roulette.lua | 2 +- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/testsuite/bench/PARAM_x86_CI.txt b/testsuite/bench/PARAM_x86_CI.txt index 0a90016863..5f8752c23b 100644 --- a/testsuite/bench/PARAM_x86_CI.txt +++ b/testsuite/bench/PARAM_x86_CI.txt @@ -1,22 +1,21 @@ -array3d 300 +array3d 500 binary-trees 16 chameneos 1e7 -coroutine-ring 2e7 +coroutine-ring 5e7 euler14-bit 2e7 fannkuch 11 fasta 5e6 life mandelbrot 5000 mandelbrot-bit 5000 -md5 20000 -nbody 5e6 +md5 30000 +nbody 8e6 nsieve 12 -nsieve-bit 12 +nsieve-bit 13 nsieve-bit-fp 12 -partialsums 1e7 +partialsums 3e7 pidigits-nogmp 5000 ray 9 -recursive-ack 10 recursive-fib 40 scimark-fft 50000 scimark-lu 5000 diff --git a/testsuite/bench/life.lua b/testsuite/bench/life.lua index 911d9fe177..4b7029dac6 100644 --- a/testsuite/bench/life.lua +++ b/testsuite/bench/life.lua @@ -103,7 +103,7 @@ function LIFE(w,h) thisgen:draw() write("Life - generation ",gen,"\n") gen=gen+1 - if gen>2000 then break end + if gen>10000 then break end --delay() -- no delay end end diff --git a/testsuite/bench/roulette.lua b/testsuite/bench/roulette.lua index 968d42a744..84afe8fdd6 100644 --- a/testsuite/bench/roulette.lua +++ b/testsuite/bench/roulette.lua @@ -6,7 +6,7 @@ -- (Let the test harness determine the random seed) -- math.randomseed(os.time()) -local population = 100e6 +local population = 200e6 local live = 0 local die = 0