diff --git a/tests/bench_periph_gpio_ll/.gitignore b/tests/bench_periph_gpio_ll/.gitignore new file mode 100644 index 000000000000..e97288e9c5eb --- /dev/null +++ b/tests/bench_periph_gpio_ll/.gitignore @@ -0,0 +1,5 @@ +# Allow custom pin mapping in Makefile.$(BOARD) files, but those don't need to +# go upstream +/Makefile.* +# but un-ignore Makefile.ci +!/Makefile.ci diff --git a/tests/bench_periph_gpio_ll/Makefile b/tests/bench_periph_gpio_ll/Makefile new file mode 100644 index 000000000000..d37ad224f441 --- /dev/null +++ b/tests/bench_periph_gpio_ll/Makefile @@ -0,0 +1,42 @@ +BOARD ?= nucleo-f767zi + +# Custom per-board pin configuration (e.g. for setting PORT_IN, PIN_IN_0, ...) +# can be provided in a Makefile.$(BOARD) file: +-include Makefile.$(BOARD) + +# Choose two output pins that do not conflict with stdio and are not connected +# to external devices such as sensors, network devices, etc. +# +# Beware: If other pins on the output port are configured as output GPIOs, they +# might be written to during this test. +PORT_OUT ?= 0 +PIN_OUT_0 ?= 0 +PIN_OUT_1 ?= 1 + +include ../Makefile.tests_common + +FEATURES_REQUIRED += periph_gpio_ll +FEATURES_REQUIRED += periph_gpio +FEATURES_OPTIONAL += periph_gpio_ll_irq +FEATURES_OPTIONAL += periph_gpio_ll_irq_level_triggered_high +FEATURES_OPTIONAL += periph_gpio_ll_irq_level_triggered_low + +USEMODULE += ztimer_usec + +include $(RIOTBASE)/Makefile.include + +# Configure if compensation of loop overhead in the estimation of the +# toggling speed should be performed. Default: Do so, except for Cortex-M7. +# For the Cortex-M7 the loop instructions are emitted together with the GPIO +# writes due to the dual issue feature. Hence, there is no loop overhead for +# Cortex-M7 to compensate for. +ifeq (cortex-m7,$(CPU_CORE)) + COMPENSATE_OVERHEAD ?= 0 +endif + +COMPENSATE_OVERHEAD ?= 1 + +CFLAGS += -DPORT_OUT=$(PORT_OUT) +CFLAGS += -DPIN_OUT_0=$(PIN_OUT_0) +CFLAGS += -DPIN_OUT_1=$(PIN_OUT_1) +CFLAGS += -DCOMPENSATE_OVERHEAD=$(COMPENSATE_OVERHEAD) diff --git a/tests/bench_periph_gpio_ll/Makefile.ci b/tests/bench_periph_gpio_ll/Makefile.ci new file mode 100644 index 000000000000..69c36480c64d --- /dev/null +++ b/tests/bench_periph_gpio_ll/Makefile.ci @@ -0,0 +1,8 @@ +BOARD_INSUFFICIENT_MEMORY := \ + arduino-duemilanove \ + arduino-nano \ + arduino-uno \ + atmega328p \ + atmega328p-xplained-mini \ + nucleo-l011k4 \ + # diff --git a/tests/bench_periph_gpio_ll/README.md b/tests/bench_periph_gpio_ll/README.md new file mode 100644 index 000000000000..72d9afa20c37 --- /dev/null +++ b/tests/bench_periph_gpio_ll/README.md @@ -0,0 +1,40 @@ +# Benchmark for `periph/gpio_ll` + +This application will generate a square wave on two output pins with a phase +difference of zero between them using both the `periph/gpio` API (as reference) +and the `periph/gpio_ll` API. You can use a logic analyzer or scope to verify +that the square waves are indeed simultaneous (no phase difference) and their +frequency. Note that with the pin based `periph/gpio` API a phase difference is +expected, but not for the port based `periph/gpio_ll` API. + +In addition, a timer is used to measure the average frequency over 50,000 +square wave periods. The overhead of the loop is estimated and a compensated +frequency (that would be achievable only by unrolling the loop) is calculated. +Both frequencies are printed, in addition to the number of CPU cycles per wave +period. The optimal value is 2 CPU cycles (signal is 1 cycle high and 1 cycle +low). + +## Configuration + +Configure in the `Makefile` or set via environment variables the number of +the GPIO port to use via the `PORT_OUT` variable. The `PIN_OUT_0` and +`PIN_OUT_1` variables select the pins to use within that GPIO port. If possible, +choose a GPIO port that is fully broken out to pins of your board but left +unconnected. That way you can connect a scope or a logic analyzer to verify +the output. + +Note that the test using `gpio_ll_write()` might cause changes to unrelated pins +on the `PORT_OUT` GPIO port, by restoring their value to what it was at the +beginning of the benchmark. + +## FAQ + +Why are 4 functions calls used for `periph/gpio`, but only 2 for +`periph/gpio_ll`? This isn't fair! + +Since in a port based APIs multiple pins can be accessed at once, only two +accesses are needed (one for the high and one for the low part of each square +wave period). In the pin based `periph/gpio` API, two accesses are needed per +pin. This unfair advantage in speed is one of the reasons we want a low level +port based API in RIOT - in addition to a more convenient to use and high level +pin based API. diff --git a/tests/bench_periph_gpio_ll/main.c b/tests/bench_periph_gpio_ll/main.c new file mode 100644 index 000000000000..17c399985868 --- /dev/null +++ b/tests/bench_periph_gpio_ll/main.c @@ -0,0 +1,294 @@ +/* + * Copyright (C) 2021 Otto-von-Guericke-Universität Magdeburg + * + * This file is subject to the terms and conditions of the GNU Lesser + * General Public License v2.1. See the file LICENSE in the top level + * directory for more details. + */ + +/** + * @ingroup tests + * @{ + * + * @file + * @brief Test application for the Peripheral GPIO Low-Level API + * + * @author Marian Buschsieweke + * + * @} + */ + +#include +#include +#include +#include + +#include "mutex.h" +#include "periph/gpio.h" +#include "periph/gpio_ll.h" +#include "test_utils/expect.h" +#include "ztimer.h" +#include "timex.h" + +#ifndef COMPENSATE_OVERHEAD +#define COMPENSATE_OVERHEAD 1 +#endif + +static gpio_port_t port_out = GPIO_PORT(PORT_OUT); + +static void print_summary_compensated(uint_fast16_t loops, uint32_t duration, + uint32_t duration_uncompensated) +{ + printf("%" PRIuFAST16 " iterations took %" PRIu32 " us " + "(%" PRIu32 " us uncompensated)\n", + loops, duration, duration_uncompensated); + printf("Two square waves pins at %12" PRIu32 " Hz " + "(%12" PRIu32 " Hz uncompensated)\n", + (uint32_t)((uint64_t)US_PER_SEC * loops / duration), + (uint32_t)((uint64_t)US_PER_SEC * loops / duration_uncompensated)); +#ifdef CLOCK_CORECLOCK + uint64_t divisor = (uint64_t)US_PER_SEC * loops / CLOCK_CORECLOCK; + uint32_t cycles = (duration + divisor / 2) / divisor; + uint32_t cycles_uncompensated = (duration_uncompensated + divisor / 2) + / divisor; + printf("~%" PRIu32 " CPU cycles per square wave period " + "(~%" PRIu32 " cycles uncompensated)\n", + cycles, cycles_uncompensated); + if (cycles <= 2) { + puts(":-D"); + } + else if (cycles <= 4) { + puts(":-)"); + } + else if (cycles <= 8) { + puts(":-|"); + } + else if (cycles <= 16) { + puts(":-("); + } + else { + puts(":'-("); + } +#endif +} + +static void print_summary_uncompensated(uint_fast16_t loops, uint32_t duration) +{ + printf("%" PRIuFAST16 " iterations took %" PRIu32 " us\n", + loops, duration); + printf("Two square waves pins at %12" PRIu32 " Hz\n", + (uint32_t)((uint64_t)US_PER_SEC * loops / duration)); +#ifdef CLOCK_CORECLOCK + uint64_t divisor = (uint64_t)US_PER_SEC * loops / CLOCK_CORECLOCK; + uint32_t cycles = (duration + divisor / 2) / divisor; + printf("~%" PRIu32 " CPU cycles per square wave period\n", cycles); + if (cycles <= 2) { + puts(":-D"); + } + else if (cycles <= 4) { + puts(":-)"); + } + else if (cycles <= 8) { + puts(":-|"); + } + else if (cycles <= 16) { + puts(":-("); + } + else { + puts(":'-("); + } +#endif +} + +int main(void) +{ + static const uint_fast16_t loops = 50000; + uint32_t loop_overhead = 0; + + uword_t mask_both = (1U << PIN_OUT_0) | (1U << PIN_OUT_1); + + puts("\n" + "Benchmarking GPIO APIs\n" + "======================"); + + if (COMPENSATE_OVERHEAD) { + puts("\n" + "estimating loop overhead for compensation\n" + "-----------------------------------------"); + uint32_t start = ztimer_now(ZTIMER_USEC); + for (uint_fast16_t i = loops; i > 0; i--) { + __asm__ volatile ("" : : : ); + } + loop_overhead = ztimer_now(ZTIMER_USEC) - start; + printf("%" PRIu32 " us for %" PRIuFAST16 " iterations\n", + loop_overhead, loops); + } + + { + puts("\n" + "periph/gpio: Using 2x gpio_set() and 2x gpio_clear()\n" + "---------------------------------------------------"); + gpio_t p0 = GPIO_PIN(PORT_OUT, PIN_OUT_0); + gpio_t p1 = GPIO_PIN(PORT_OUT, PIN_OUT_1); + gpio_init(p0, GPIO_OUT); + gpio_init(p1, GPIO_OUT); + + uint32_t start = ztimer_now(ZTIMER_USEC); + for (uint_fast16_t i = loops; i > 0; i--) { + gpio_set(p0); + gpio_set(p1); + gpio_clear(p0); + gpio_clear(p1); + } + uint32_t duration = ztimer_now(ZTIMER_USEC) - start; + + if (COMPENSATE_OVERHEAD) { + print_summary_compensated(loops, duration - loop_overhead, + duration); + } + else { + print_summary_uncompensated(loops, duration); + } + } + + { + puts("\n" + "periph/gpio_ll: Using gpio_ll_set() and gpio_ll_clear()\n" + "-------------------------------------------------------"); + gpio_conf_t conf = { + .state = GPIO_OUTPUT_PUSH_PULL, + .slew_rate = GPIO_SLEW_FASTEST + }; + expect(0 == gpio_ll_init(port_out, PIN_OUT_0, &conf)); + expect(0 == gpio_ll_init(port_out, PIN_OUT_1, &conf)); + + uint32_t start = ztimer_now(ZTIMER_USEC); + for (uint_fast16_t i = loops; i > 0; i--) { + gpio_ll_set(port_out, (1UL << PIN_OUT_0) | (1UL << PIN_OUT_1)); + gpio_ll_clear(port_out, (1UL << PIN_OUT_0) | (1UL << PIN_OUT_1)); + } + uint32_t duration = ztimer_now(ZTIMER_USEC) - start; + + if (COMPENSATE_OVERHEAD) { + print_summary_compensated(loops, duration - loop_overhead, + duration); + } + else { + print_summary_uncompensated(loops, duration); + } + } + + { + puts("\n" + "periph/gpio: Using 4x gpio_toggle()\n" + "-----------------------------------"); + gpio_t p0 = GPIO_PIN(PORT_OUT, PIN_OUT_0); + gpio_t p1 = GPIO_PIN(PORT_OUT, PIN_OUT_1); + gpio_init(p0, GPIO_OUT); + gpio_init(p1, GPIO_OUT); + + uint32_t start = ztimer_now(ZTIMER_USEC); + for (uint_fast16_t i = loops; i > 0; i--) { + gpio_toggle(p0); + gpio_toggle(p1); + gpio_toggle(p0); + gpio_toggle(p1); + } + uint32_t duration = ztimer_now(ZTIMER_USEC) - start; + + if (COMPENSATE_OVERHEAD) { + print_summary_compensated(loops, duration - loop_overhead, + duration); + } + else { + print_summary_uncompensated(loops, duration); + } + } + + { + puts("\n" + "periph/gpio_ll: Using 2x gpio_ll_toggle()\n" + "-----------------------------------------"); + gpio_conf_t conf = { + .state = GPIO_OUTPUT_PUSH_PULL, + .slew_rate = GPIO_SLEW_FASTEST + }; + expect(0 == gpio_ll_init(port_out, PIN_OUT_0, &conf)); + expect(0 == gpio_ll_init(port_out, PIN_OUT_1, &conf)); + + uint32_t start = ztimer_now(ZTIMER_USEC); + for (uint_fast16_t i = loops; i > 0; i--) { + gpio_ll_toggle(port_out, mask_both); + gpio_ll_toggle(port_out, mask_both); + } + uint32_t duration = ztimer_now(ZTIMER_USEC) - start; + + if (COMPENSATE_OVERHEAD) { + print_summary_compensated(loops, duration - loop_overhead, + duration); + } + else { + print_summary_uncompensated(loops, duration); + } + } + + { + puts("\n" + "periph/gpio: Using 4x gpio_write()\n" + "----------------------------------"); + gpio_t p0 = GPIO_PIN(PORT_OUT, PIN_OUT_0); + gpio_t p1 = GPIO_PIN(PORT_OUT, PIN_OUT_1); + gpio_init(p0, GPIO_OUT); + gpio_init(p1, GPIO_OUT); + + uint32_t start = ztimer_now(ZTIMER_USEC); + for (uint_fast16_t i = loops; i > 0; i--) { + gpio_write(p0, 1); + gpio_write(p1, 1); + gpio_write(p0, 0); + gpio_write(p1, 0); + } + uint32_t duration = ztimer_now(ZTIMER_USEC) - start; + + if (COMPENSATE_OVERHEAD) { + print_summary_compensated(loops, duration - loop_overhead, + duration); + } + else { + print_summary_uncompensated(loops, duration); + } + } + + { + puts("\n" + "periph/gpio_ll: Using 2x gpio_ll_write()\n" + "----------------------------------------"); + gpio_conf_t conf = { + .state = GPIO_OUTPUT_PUSH_PULL, + .slew_rate = GPIO_SLEW_FASTEST + }; + expect(0 == gpio_ll_init(port_out, PIN_OUT_0, &conf)); + expect(0 == gpio_ll_init(port_out, PIN_OUT_1, &conf)); + + uword_t both_high = gpio_ll_prepare_write(port_out, mask_both, + mask_both); + uword_t both_low = gpio_ll_prepare_write(port_out, mask_both, 0); + uint32_t start = ztimer_now(ZTIMER_USEC); + for (uint_fast16_t i = loops; i > 0; i--) { + gpio_ll_write(port_out, both_high); + gpio_ll_write(port_out, both_low); + } + uint32_t duration = ztimer_now(ZTIMER_USEC) - start; + + if (COMPENSATE_OVERHEAD) { + print_summary_compensated(loops, duration - loop_overhead, + duration); + } + else { + print_summary_uncompensated(loops, duration); + } + } + + puts("\n\nTEST SUCCEEDED"); + return 0; +} diff --git a/tests/bench_periph_gpio_ll/tests-with-config/01-run.py b/tests/bench_periph_gpio_ll/tests-with-config/01-run.py new file mode 100755 index 000000000000..500048e3bb0a --- /dev/null +++ b/tests/bench_periph_gpio_ll/tests-with-config/01-run.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2022 Otto-von-Guericke-Universität Magdeburg +# +# This file is subject to the terms and conditions of the GNU Lesser +# General Public License v2.1. See the file LICENSE in the top level +# directory for more details. + +import sys +from testrunner import run + + +def testfunc(child): + child.expect('TEST SUCCEEDED') + + +if __name__ == "__main__": + sys.exit(run(testfunc))