Skip to content

Commit

Permalink
tests/bench_periph_gpio_ll: add benchmark for periph/gpio_ll
Browse files Browse the repository at this point in the history
  • Loading branch information
maribu committed Apr 22, 2022
1 parent 22a1773 commit 28791c4
Show file tree
Hide file tree
Showing 6 changed files with 407 additions and 0 deletions.
5 changes: 5 additions & 0 deletions tests/bench_periph_gpio_ll/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Allow custom pin mapping in Makefile.$(BOARD) files, but those don't need to
# go upstream
/Makefile.*
# but un-ignore Makefile.ci
!/Makefile.ci
42 changes: 42 additions & 0 deletions tests/bench_periph_gpio_ll/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
BOARD ?= nucleo-f767zi

# Custom per-board pin configuration (e.g. for setting PORT_IN, PIN_IN_0, ...)
# can be provided in a Makefile.$(BOARD) file:
-include Makefile.$(BOARD)

# Choose two output pins that do not conflict with stdio and are not connected
# to external devices such as sensors, network devices, etc.
#
# Beware: If other pins on the output port are configured as output GPIOs, they
# might be written to during this test.
PORT_OUT ?= 0
PIN_OUT_0 ?= 0
PIN_OUT_1 ?= 1

include ../Makefile.tests_common

FEATURES_REQUIRED += periph_gpio_ll
FEATURES_REQUIRED += periph_gpio
FEATURES_OPTIONAL += periph_gpio_ll_irq
FEATURES_OPTIONAL += periph_gpio_ll_irq_level_triggered_high
FEATURES_OPTIONAL += periph_gpio_ll_irq_level_triggered_low

USEMODULE += ztimer_usec

include $(RIOTBASE)/Makefile.include

# Configure if compensation of loop overhead in the estimation of the
# toggling speed should be performed. Default: Do so, except for Cortex-M7.
# For the Cortex-M7 the loop instructions are emitted together with the GPIO
# writes due to the dual issue feature. Hence, there is no loop overhead for
# Cortex-M7 to compensate for.
ifeq (cortex-m7,$(CPU_CORE))
COMPENSATE_OVERHEAD ?= 0
endif

COMPENSATE_OVERHEAD ?= 1

CFLAGS += -DPORT_OUT=$(PORT_OUT)
CFLAGS += -DPIN_OUT_0=$(PIN_OUT_0)
CFLAGS += -DPIN_OUT_1=$(PIN_OUT_1)
CFLAGS += -DCOMPENSATE_OVERHEAD=$(COMPENSATE_OVERHEAD)
8 changes: 8 additions & 0 deletions tests/bench_periph_gpio_ll/Makefile.ci
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
BOARD_INSUFFICIENT_MEMORY := \
arduino-duemilanove \
arduino-nano \
arduino-uno \
atmega328p \
atmega328p-xplained-mini \
nucleo-l011k4 \
#
40 changes: 40 additions & 0 deletions tests/bench_periph_gpio_ll/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Benchmark for `periph/gpio_ll`

This application will generate a square wave on two output pins with a phase
difference of zero between them using both the `periph/gpio` API (as reference)
and the `periph/gpio_ll` API. You can use a logic analyzer or scope to verify
that the square waves are indeed simultaneous (no phase difference) and their
frequency. Note that with the pin based `periph/gpio` API a phase difference is
expected, but not for the port based `periph/gpio_ll` API.

In addition, a timer is used to measure the average frequency over 50,000
square wave periods. The overhead of the loop is estimated and a compensated
frequency (that would be achievable only by unrolling the loop) is calculated.
Both frequencies are printed, in addition to the number of CPU cycles per wave
period. The optimal value is 2 CPU cycles (signal is 1 cycle high and 1 cycle
low).

## Configuration

Configure in the `Makefile` or set via environment variables the number of
the GPIO port to use via the `PORT_OUT` variable. The `PIN_OUT_0` and
`PIN_OUT_1` variables select the pins to use within that GPIO port. If possible,
choose a GPIO port that is fully broken out to pins of your board but left
unconnected. That way you can connect a scope or a logic analyzer to verify
the output.

Note that the test using `gpio_ll_write()` might cause changes to unrelated pins
on the `PORT_OUT` GPIO port, by restoring their value to what it was at the
beginning of the benchmark.

## FAQ

Why are 4 functions calls used for `periph/gpio`, but only 2 for
`periph/gpio_ll`? This isn't fair!

Since in a port based APIs multiple pins can be accessed at once, only two
accesses are needed (one for the high and one for the low part of each square
wave period). In the pin based `periph/gpio` API, two accesses are needed per
pin. This unfair advantage in speed is one of the reasons we want a low level
port based API in RIOT - in addition to a more convenient to use and high level
pin based API.
294 changes: 294 additions & 0 deletions tests/bench_periph_gpio_ll/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,294 @@
/*
* Copyright (C) 2021 Otto-von-Guericke-Universität Magdeburg
*
* This file is subject to the terms and conditions of the GNU Lesser
* General Public License v2.1. See the file LICENSE in the top level
* directory for more details.
*/

/**
* @ingroup tests
* @{
*
* @file
* @brief Test application for the Peripheral GPIO Low-Level API
*
* @author Marian Buschsieweke <[email protected]>
*
* @}
*/

#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#include "mutex.h"
#include "periph/gpio.h"
#include "periph/gpio_ll.h"
#include "test_utils/expect.h"
#include "ztimer.h"
#include "timex.h"

#ifndef COMPENSATE_OVERHEAD
#define COMPENSATE_OVERHEAD 1
#endif

static gpio_port_t port_out = GPIO_PORT(PORT_OUT);

static void print_summary_compensated(uint_fast16_t loops, uint32_t duration,
uint32_t duration_uncompensated)
{
printf("%" PRIuFAST16 " iterations took %" PRIu32 " us "
"(%" PRIu32 " us uncompensated)\n",
loops, duration, duration_uncompensated);
printf("Two square waves pins at %12" PRIu32 " Hz "
"(%12" PRIu32 " Hz uncompensated)\n",
(uint32_t)((uint64_t)US_PER_SEC * loops / duration),
(uint32_t)((uint64_t)US_PER_SEC * loops / duration_uncompensated));
#ifdef CLOCK_CORECLOCK
uint64_t divisor = (uint64_t)US_PER_SEC * loops / CLOCK_CORECLOCK;
uint32_t cycles = (duration + divisor / 2) / divisor;
uint32_t cycles_uncompensated = (duration_uncompensated + divisor / 2)
/ divisor;
printf("~%" PRIu32 " CPU cycles per square wave period "
"(~%" PRIu32 " cycles uncompensated)\n",
cycles, cycles_uncompensated);
if (cycles <= 2) {
puts(":-D");
}
else if (cycles <= 4) {
puts(":-)");
}
else if (cycles <= 8) {
puts(":-|");
}
else if (cycles <= 16) {
puts(":-(");
}
else {
puts(":'-(");
}
#endif
}

static void print_summary_uncompensated(uint_fast16_t loops, uint32_t duration)
{
printf("%" PRIuFAST16 " iterations took %" PRIu32 " us\n",
loops, duration);
printf("Two square waves pins at %12" PRIu32 " Hz\n",
(uint32_t)((uint64_t)US_PER_SEC * loops / duration));
#ifdef CLOCK_CORECLOCK
uint64_t divisor = (uint64_t)US_PER_SEC * loops / CLOCK_CORECLOCK;
uint32_t cycles = (duration + divisor / 2) / divisor;
printf("~%" PRIu32 " CPU cycles per square wave period\n", cycles);
if (cycles <= 2) {
puts(":-D");
}
else if (cycles <= 4) {
puts(":-)");
}
else if (cycles <= 8) {
puts(":-|");
}
else if (cycles <= 16) {
puts(":-(");
}
else {
puts(":'-(");
}
#endif
}

int main(void)
{
static const uint_fast16_t loops = 50000;
uint32_t loop_overhead = 0;

uword_t mask_both = (1U << PIN_OUT_0) | (1U << PIN_OUT_1);

puts("\n"
"Benchmarking GPIO APIs\n"
"======================");

if (COMPENSATE_OVERHEAD) {
puts("\n"
"estimating loop overhead for compensation\n"
"-----------------------------------------");
uint32_t start = ztimer_now(ZTIMER_USEC);
for (uint_fast16_t i = loops; i > 0; i--) {
__asm__ volatile ("" : : : );
}
loop_overhead = ztimer_now(ZTIMER_USEC) - start;
printf("%" PRIu32 " us for %" PRIuFAST16 " iterations\n",
loop_overhead, loops);
}

{
puts("\n"
"periph/gpio: Using 2x gpio_set() and 2x gpio_clear()\n"
"---------------------------------------------------");
gpio_t p0 = GPIO_PIN(PORT_OUT, PIN_OUT_0);
gpio_t p1 = GPIO_PIN(PORT_OUT, PIN_OUT_1);
gpio_init(p0, GPIO_OUT);
gpio_init(p1, GPIO_OUT);

uint32_t start = ztimer_now(ZTIMER_USEC);
for (uint_fast16_t i = loops; i > 0; i--) {
gpio_set(p0);
gpio_set(p1);
gpio_clear(p0);
gpio_clear(p1);
}
uint32_t duration = ztimer_now(ZTIMER_USEC) - start;

if (COMPENSATE_OVERHEAD) {
print_summary_compensated(loops, duration - loop_overhead,
duration);
}
else {
print_summary_uncompensated(loops, duration);
}
}

{
puts("\n"
"periph/gpio_ll: Using gpio_ll_set() and gpio_ll_clear()\n"
"-------------------------------------------------------");
gpio_conf_t conf = {
.state = GPIO_OUTPUT_PUSH_PULL,
.slew_rate = GPIO_SLEW_FASTEST
};
expect(0 == gpio_ll_init(port_out, PIN_OUT_0, &conf));
expect(0 == gpio_ll_init(port_out, PIN_OUT_1, &conf));

uint32_t start = ztimer_now(ZTIMER_USEC);
for (uint_fast16_t i = loops; i > 0; i--) {
gpio_ll_set(port_out, (1UL << PIN_OUT_0) | (1UL << PIN_OUT_1));
gpio_ll_clear(port_out, (1UL << PIN_OUT_0) | (1UL << PIN_OUT_1));
}
uint32_t duration = ztimer_now(ZTIMER_USEC) - start;

if (COMPENSATE_OVERHEAD) {
print_summary_compensated(loops, duration - loop_overhead,
duration);
}
else {
print_summary_uncompensated(loops, duration);
}
}

{
puts("\n"
"periph/gpio: Using 4x gpio_toggle()\n"
"-----------------------------------");
gpio_t p0 = GPIO_PIN(PORT_OUT, PIN_OUT_0);
gpio_t p1 = GPIO_PIN(PORT_OUT, PIN_OUT_1);
gpio_init(p0, GPIO_OUT);
gpio_init(p1, GPIO_OUT);

uint32_t start = ztimer_now(ZTIMER_USEC);
for (uint_fast16_t i = loops; i > 0; i--) {
gpio_toggle(p0);
gpio_toggle(p1);
gpio_toggle(p0);
gpio_toggle(p1);
}
uint32_t duration = ztimer_now(ZTIMER_USEC) - start;

if (COMPENSATE_OVERHEAD) {
print_summary_compensated(loops, duration - loop_overhead,
duration);
}
else {
print_summary_uncompensated(loops, duration);
}
}

{
puts("\n"
"periph/gpio_ll: Using 2x gpio_ll_toggle()\n"
"-----------------------------------------");
gpio_conf_t conf = {
.state = GPIO_OUTPUT_PUSH_PULL,
.slew_rate = GPIO_SLEW_FASTEST
};
expect(0 == gpio_ll_init(port_out, PIN_OUT_0, &conf));
expect(0 == gpio_ll_init(port_out, PIN_OUT_1, &conf));

uint32_t start = ztimer_now(ZTIMER_USEC);
for (uint_fast16_t i = loops; i > 0; i--) {
gpio_ll_toggle(port_out, mask_both);
gpio_ll_toggle(port_out, mask_both);
}
uint32_t duration = ztimer_now(ZTIMER_USEC) - start;

if (COMPENSATE_OVERHEAD) {
print_summary_compensated(loops, duration - loop_overhead,
duration);
}
else {
print_summary_uncompensated(loops, duration);
}
}

{
puts("\n"
"periph/gpio: Using 4x gpio_write()\n"
"----------------------------------");
gpio_t p0 = GPIO_PIN(PORT_OUT, PIN_OUT_0);
gpio_t p1 = GPIO_PIN(PORT_OUT, PIN_OUT_1);
gpio_init(p0, GPIO_OUT);
gpio_init(p1, GPIO_OUT);

uint32_t start = ztimer_now(ZTIMER_USEC);
for (uint_fast16_t i = loops; i > 0; i--) {
gpio_write(p0, 1);
gpio_write(p1, 1);
gpio_write(p0, 0);
gpio_write(p1, 0);
}
uint32_t duration = ztimer_now(ZTIMER_USEC) - start;

if (COMPENSATE_OVERHEAD) {
print_summary_compensated(loops, duration - loop_overhead,
duration);
}
else {
print_summary_uncompensated(loops, duration);
}
}

{
puts("\n"
"periph/gpio_ll: Using 2x gpio_ll_write()\n"
"----------------------------------------");
gpio_conf_t conf = {
.state = GPIO_OUTPUT_PUSH_PULL,
.slew_rate = GPIO_SLEW_FASTEST
};
expect(0 == gpio_ll_init(port_out, PIN_OUT_0, &conf));
expect(0 == gpio_ll_init(port_out, PIN_OUT_1, &conf));

uword_t both_high = gpio_ll_prepare_write(port_out, mask_both,
mask_both);
uword_t both_low = gpio_ll_prepare_write(port_out, mask_both, 0);
uint32_t start = ztimer_now(ZTIMER_USEC);
for (uint_fast16_t i = loops; i > 0; i--) {
gpio_ll_write(port_out, both_high);
gpio_ll_write(port_out, both_low);
}
uint32_t duration = ztimer_now(ZTIMER_USEC) - start;

if (COMPENSATE_OVERHEAD) {
print_summary_compensated(loops, duration - loop_overhead,
duration);
}
else {
print_summary_uncompensated(loops, duration);
}
}

puts("\n\nTEST SUCCEEDED");
return 0;
}
Loading

0 comments on commit 28791c4

Please sign in to comment.