From 9efc716dec4f4dcea5fc0091eaa3166ee61c1bd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Thu, 19 Nov 2020 21:55:58 +0100 Subject: [PATCH 1/2] bench: Add EVM instructions synthetic benchmarks This introduces a set of on-demand generated EVM bytecodes available in the evmone-bench tool. Each bytecode tries to stress a single "low-level" EVM instruction. Instructions are grouped by their stack requirements and there are also two main modes of the structure of the generated bytecodes. --- test/bench/CMakeLists.txt | 1 + test/bench/bench.cpp | 2 + test/bench/synthetic_benchmarks.cpp | 277 ++++++++++++++++++++++++++++ test/bench/synthetic_benchmarks.hpp | 9 + 4 files changed, 289 insertions(+) create mode 100644 test/bench/synthetic_benchmarks.cpp create mode 100644 test/bench/synthetic_benchmarks.hpp diff --git a/test/bench/CMakeLists.txt b/test/bench/CMakeLists.txt index 875c7eb45e..eee3c10425 100644 --- a/test/bench/CMakeLists.txt +++ b/test/bench/CMakeLists.txt @@ -11,6 +11,7 @@ target_sources( evmone-bench PRIVATE bench.cpp helpers.hpp + synthetic_benchmarks.cpp synthetic_benchmarks.hpp ) set(HAVE_STD_FILESYSTEM 0) diff --git a/test/bench/bench.cpp b/test/bench/bench.cpp index ea72861e7f..8cda55e0ca 100644 --- a/test/bench/bench.cpp +++ b/test/bench/bench.cpp @@ -3,6 +3,7 @@ // SPDX-License-Identifier: Apache-2.0 #include "helpers.hpp" +#include "synthetic_benchmarks.hpp" #include #include #include @@ -287,6 +288,7 @@ int main(int argc, char** argv) registered_vms["advanced"] = evmc::VM{evmc_create_evmone(), {{"O", "2"}}}; registered_vms["baseline"] = evmc::VM{evmc_create_evmone(), {{"O", "0"}}}; register_benchmarks(benchmark_cases); + register_synthetic_benchmarks(); RunSpecifiedBenchmarks(); return 0; } diff --git a/test/bench/synthetic_benchmarks.cpp b/test/bench/synthetic_benchmarks.cpp new file mode 100644 index 0000000000..b88df2978c --- /dev/null +++ b/test/bench/synthetic_benchmarks.cpp @@ -0,0 +1,277 @@ +// evmone: Fast Ethereum Virtual Machine implementation +// Copyright 2020 The evmone Authors. +// SPDX-License-Identifier: Apache-2.0 + +#include "synthetic_benchmarks.hpp" +#include "helpers.hpp" +#include "test/utils/bytecode.hpp" +#include +#include + +using namespace benchmark; + +namespace evmone::test +{ +namespace +{ +/// Stack limit inside the EVM benchmark loop (one stack item is used for the loop counter). +constexpr auto stack_limit = 1023; + +enum class Mode +{ + min_stack = 0, ///< The code uses as minimal stack as possible. + full_stack = 1, ///< The code fills the stack up to its limit. +}; + +/// The instruction grouping by EVM stack requirements. +enum class InstructionCategory : char +{ + nop = 'n', ///< No-op instruction. + nullop = 'a', ///< Nullary operator - produces a result without any stack input. + unop = 'u', ///< Unary operator. + binop = 'b', ///< Binary operator. + push = 'p', ///< PUSH instruction. + dup = 'd', ///< DUP instruction. + swap = 's', ///< SWAP instruction. + other = 'X', ///< Not any of the categories above. +}; + +constexpr InstructionCategory get_instruction_category(evmc_opcode opcode) noexcept +{ + const auto trait = instr::traits[opcode]; + if (opcode >= OP_PUSH1 && opcode <= OP_PUSH32) + return InstructionCategory::push; + else if (opcode >= OP_SWAP1 && opcode <= OP_SWAP16) + return InstructionCategory::swap; + else if (opcode >= OP_DUP1 && opcode <= OP_DUP16) + return InstructionCategory::dup; + else if (trait.stack_height_required == 0 && trait.stack_height_change == 0) + return InstructionCategory::nop; + else if (trait.stack_height_required == 0 && trait.stack_height_change == 1) + return InstructionCategory::nullop; + else if (trait.stack_height_required == 1 && trait.stack_height_change == 0) + return InstructionCategory::unop; + else if (trait.stack_height_required == 2 && trait.stack_height_change == -1) + return InstructionCategory::binop; + else + return InstructionCategory::other; +} + +struct CodeParams +{ + evmc_opcode opcode; + Mode mode; +}; + +/// The less-than comparison operator. Needed for std::map. +[[maybe_unused]] inline constexpr bool operator<(const CodeParams& a, const CodeParams& b) noexcept +{ + return std::tuple(a.opcode, a.mode) < std::tuple(b.opcode, b.mode); +} + +std::string to_string(const CodeParams& params) +{ + return std::string{instr::traits[params.opcode].name} + '/' + + static_cast(get_instruction_category(params.opcode)) + + std::to_string(static_cast(params.mode)); +} + +/// Generates the EVM benchmark loop inner code for the given opcode and "mode". +bytecode generate_loop_inner_code(CodeParams params) +{ + const auto [opcode, mode] = params; + const auto category = get_instruction_category(opcode); + switch (mode) + { + case Mode::min_stack: + switch (category) + { + case InstructionCategory::nop: + // JUMPDEST JUMPDEST ... + return stack_limit * 2 * bytecode{opcode}; + + case InstructionCategory::nullop: + // CALLER POP CALLER POP ... + return stack_limit * (bytecode{opcode} + OP_POP); + + case InstructionCategory::unop: + // DUP1 NOT NOT ... POP + return OP_DUP1 + stack_limit * 2 * bytecode{opcode} + OP_POP; + + case InstructionCategory::binop: + // DUP1 DUP1 ADD DUP1 ADD DUP1 ADD ... POP + return OP_DUP1 + (stack_limit - 1) * (OP_DUP1 + bytecode{opcode}) + OP_POP; + + case InstructionCategory::push: + // PUSH1 POP PUSH1 POP ... + return stack_limit * (push(opcode, {}) + OP_POP); + + case InstructionCategory::dup: + { + // The required n stack height for DUPn is provided by + // duplicating the loop counter n-1 times with DUP1. + const auto n = opcode - OP_DUP1 + 1; + // DUP1 ... DUPn POP DUPn POP ... POP ... + // \ n-1 / \ n-1 / + return (n - 1) * OP_DUP1 + // Required n stack height. + (stack_limit - (n - 1)) * // + (bytecode{opcode} + OP_POP) + // Multiple DUPn POP pairs. + (n - 1) * OP_POP; // Pop initially duplicated values. + } + + case InstructionCategory::swap: + { + // The required n+1 stack height for SWAPn is provided by duplicating the loop counter + // n times with DUP1. This also guarantees the loop counter remains unchanged because + // it is always going to be swapped to the same value. + const auto n = opcode - OP_SWAP1 + 1; + // DUP1 ... SWAPn SWAPn ... POP ... + // \ n / \ n / + return n * OP_DUP1 + // Required n+1 stack height. + stack_limit * 2 * bytecode{opcode} + // Multiple SWAPns. + n * OP_POP; // Pop initially duplicated values. + } + + default: + break; + } + break; + + case Mode::full_stack: + switch (category) + { + case InstructionCategory::nullop: + // CALLER CALLER ... POP POP ... + return stack_limit * opcode + stack_limit * OP_POP; + + case InstructionCategory::binop: + // DUP1 DUP1 DUP1 ... ADD ADD ADD ... POP + return stack_limit * OP_DUP1 + (stack_limit - 1) * opcode + OP_POP; + + case InstructionCategory::push: + // PUSH1 PUSH1 PUSH1 ... POP POP POP ... + return stack_limit * push(opcode, {}) + stack_limit * OP_POP; + + case InstructionCategory::dup: + { + // The required initial n stack height for DUPn is provided by + // duplicating the loop counter n-1 times with DUP1. + const auto n = opcode - OP_DUP1 + 1; + // DUP1 ... DUPn DUPn ... POP POP ... + // \ n-1 / \ S-(n-1) / \ S / + return (n - 1) * OP_DUP1 + // Required n stack height. + (stack_limit - (n - 1)) * bytecode{opcode} + // Fill the stack with DUPn. + stack_limit * OP_POP; // Clear whole stack. + } + + default: + break; + } + break; + } + + return {}; +} + +/// Generates a benchmark loop with given inner code. +/// +/// This generates do-while loop with 255 iterations and it starts with PUSH1 of 255 as the loop +/// counter. The while check is done as `(counter += -1) != 0`. The SUB is avoided because it +/// consumes arguments in unnatural order and additional SWAP would be required. +/// +/// The loop counter stays on the stack top. The inner code is allowed to duplicate it, but must not +/// modify it. +bytecode generate_loop_v1(const bytecode& inner_code) +{ + const auto counter = push(255); + const auto jumpdest_offset = counter.size(); + return counter + OP_JUMPDEST + inner_code + // loop label + inner code + push("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff") + // -1 + OP_ADD + OP_DUP1 + // counter += (-1) + push(jumpdest_offset) + OP_JUMPI; // jump to jumpdest_offset if counter != 0 +} + +/// Generates a benchmark loop with given inner code. +/// +/// This is improved variant of v1. It has exactly the same instructions and consumes the same +/// amount of gas, but according to performed benchmarks (see "loop_v1" and "loop_v2") it runs +/// faster. And we want the lowest possible loop overhead. +/// The change is to set the loop counter to -255 and check `(counter += 1) != 0`. +bytecode generate_loop_v2(const bytecode& inner_code) +{ + const auto counter = + push("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff01"); // -255 + const auto jumpdest_offset = counter.size(); + return counter + OP_JUMPDEST + inner_code + // loop label + inner code + push(1) + OP_ADD + OP_DUP1 + // counter += 1 + push(jumpdest_offset) + OP_JUMPI; // jump to jumpdest_offset if counter != 0 +} + +bytes_view generate_code(CodeParams params) +{ + static std::map cache; + + auto& code = cache[params]; + if (!code.empty()) + return code; + + code = generate_loop_v2(generate_loop_inner_code(params)); // Cache it. + return code; +} +} // namespace + +void register_synthetic_benchmarks() +{ + std::vector params_list; + + // Nops & unops. + for (const auto opcode : {OP_JUMPDEST, OP_ISZERO, OP_NOT}) + params_list.push_back({opcode, Mode::min_stack}); + + // Binops. + for (const auto opcode : {OP_ADD, OP_MUL, OP_SUB, OP_SIGNEXTEND, OP_LT, OP_GT, OP_SLT, OP_SGT, + OP_EQ, OP_AND, OP_OR, OP_XOR, OP_BYTE, OP_SHL, OP_SHR, OP_SAR}) + params_list.insert( + params_list.end(), {{opcode, Mode::min_stack}, {opcode, Mode::full_stack}}); + + // Nullops. + for (const auto opcode : {OP_ADDRESS, OP_CALLER, OP_CALLVALUE, OP_CALLDATASIZE, OP_CODESIZE, + OP_RETURNDATASIZE, OP_PC, OP_MSIZE, OP_GAS}) + params_list.insert( + params_list.end(), {{opcode, Mode::min_stack}, {opcode, Mode::full_stack}}); + + // PUSH. + for (auto opcode = OP_PUSH1; opcode <= OP_PUSH32; opcode = static_cast(opcode + 1)) + params_list.insert( + params_list.end(), {{opcode, Mode::min_stack}, {opcode, Mode::full_stack}}); + + // SWAP. + for (auto opcode = OP_SWAP1; opcode <= OP_SWAP16; opcode = static_cast(opcode + 1)) + params_list.insert(params_list.end(), {{opcode, Mode::min_stack}}); + + // DUP. + for (auto opcode = OP_DUP1; opcode <= OP_DUP16; opcode = static_cast(opcode + 1)) + params_list.insert( + params_list.end(), {{opcode, Mode::min_stack}, {opcode, Mode::full_stack}}); + + + for (auto& [vm_name, vm] : registered_vms) + { + RegisterBenchmark((std::string{vm_name} + "/execute/synth/loop_v1").c_str(), + [&vm = vm](State& state) { execute(state, vm, generate_loop_v1({})); }); + RegisterBenchmark((std::string{vm_name} + "/execute/synth/loop_v2").c_str(), + [&vm = vm](State& state) { execute(state, vm, generate_loop_v2({})); }); + } + + for (const auto params : params_list) + { + for (auto& [vm_name, vm] : registered_vms) + { + RegisterBenchmark( + (std::string{vm_name} + "/execute/synth/" + to_string(params)).c_str(), + [&vm = vm, params](State& state) { execute(state, vm, generate_code(params)); }) + ->Unit(kMicrosecond); + } + } +} +} // namespace evmone::test diff --git a/test/bench/synthetic_benchmarks.hpp b/test/bench/synthetic_benchmarks.hpp new file mode 100644 index 0000000000..0fc923a1e1 --- /dev/null +++ b/test/bench/synthetic_benchmarks.hpp @@ -0,0 +1,9 @@ +// evmone: Fast Ethereum Virtual Machine implementation +// Copyright 2020 The evmone Authors. +// SPDX-License-Identifier: Apache-2.0 +#pragma once + +namespace evmone::test +{ +void register_synthetic_benchmarks(); +} From 0b3c2c4345176cd309a3d1782121c434698b423e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Tue, 19 Jan 2021 14:48:40 +0100 Subject: [PATCH 2/2] bench: Allow omitting DIR argument to evmone-bench --- test/bench/CMakeLists.txt | 13 +++++++------ test/bench/bench.cpp | 12 +++++++----- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/test/bench/CMakeLists.txt b/test/bench/CMakeLists.txt index eee3c10425..ff35bc1597 100644 --- a/test/bench/CMakeLists.txt +++ b/test/bench/CMakeLists.txt @@ -47,11 +47,12 @@ set(PREFIX evmone/bench) # Check if DIR argument works. add_test(NAME ${PREFIX}/dir COMMAND evmone-bench ${CMAKE_CURRENT_SOURCE_DIR}/../benchmarks --benchmark_list_tests) +set_tests_properties(${PREFIX}/dir PROPERTIES PASS_REGULAR_EXPRESSION "execute/synth") -# Empty DIR name should run no benchmarks. -add_test(NAME ${PREFIX}/dirname_empty COMMAND evmone-bench "" --benchmark_list_tests) -set_tests_properties(${PREFIX}/dirname_empty PROPERTIES PASS_REGULAR_EXPRESSION "Failed to match any benchmarks") +# Omitting DIR is fine. +add_test(NAME ${PREFIX}/no_dir COMMAND evmone-bench --benchmark_list_tests) +set_tests_properties(${PREFIX}/no_dir PROPERTIES PASS_REGULAR_EXPRESSION "execute/synth") -# Missing DIR argument is an error. -add_test(NAME ${PREFIX}/no_dir COMMAND evmone-bench) -set_tests_properties(${PREFIX}/no_dir PROPERTIES PASS_REGULAR_EXPRESSION "DIR argument .* missing") +# Empty DIR name should list only built-in benchmarks +add_test(NAME ${PREFIX}/dirname_empty COMMAND evmone-bench "" --benchmark_list_tests) +set_tests_properties(${PREFIX}/dirname_empty PROPERTIES PASS_REGULAR_EXPRESSION "execute/synth") diff --git a/test/bench/bench.cpp b/test/bench/bench.cpp index 8cda55e0ca..4a2098a6b3 100644 --- a/test/bench/bench.cpp +++ b/test/bench/bench.cpp @@ -191,12 +191,14 @@ constexpr auto cli_parsing_error = -3; /// /// The following variants of number arguments are supported (including argv[0]): /// +/// 1: evmone-bench +/// Uses evmone VMs, only synthetic benchmarks are available. /// 2: evmone-bench benchmarks_dir -/// Uses evmone VM, loads all benchmarks from benchmarks_dir. +/// Uses evmone VMs, loads all benchmarks from benchmarks_dir. /// 3: evmone-bench evmc_config benchmarks_dir -/// The same as (2) but loads custom EVMC VM. +/// The same as (2) but loads additional custom EVMC VM. /// 4: evmone-bench code_hex_file input_hex expected_output_hex. -/// Uses evmone VM, registers custom benchmark with the code from the given file, +/// Uses evmone VMs, registers custom benchmark with the code from the given file, /// and the given input. The benchmark will compare the output with the provided /// expected one. std::tuple> parseargs(int argc, char** argv) @@ -211,8 +213,8 @@ std::tuple> parseargs(int argc, char** argv) switch (argc) { case 1: - std::cerr << "DIR argument (path to a directory with benchmarks) missing\n"; - return {cli_parsing_error, {}}; + // Run with built-in synthetic benchmarks only. + break; case 2: benchmarks_dir = argv[1]; break;