Skip to content

Commit

Permalink
Merge pull request #9812 from kjbracey-arm/wait_ns
Browse files Browse the repository at this point in the history
Add wait_ns API
  • Loading branch information
Cruz Monrreal authored Mar 1, 2019
2 parents eff8b1d + d2df5a0 commit e6caa12
Show file tree
Hide file tree
Showing 4 changed files with 196 additions and 4 deletions.
96 changes: 96 additions & 0 deletions TESTS/mbed_platform/wait_ns/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/*
* Copyright (c) 2018, ARM Limited, All Rights Reserved
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "mbed.h"
#include "greentea-client/test_env.h"
#include "unity.h"
#include "utest.h"
#include "platform/mbed_wait_api.h"
#include "hal/us_ticker_api.h"
#include "hal/lp_ticker_api.h"

using namespace utest::v1;

/* This test is created based on the test for Timer class.
* Since low power timer is less accurate than regular
* timer we need to adjust delta.
*/

/*
* Define tolerance as follows:
* Timer might be +/-5% out; wait_ns is permitted 40% slow, but not fast.
* Therefore minimum measured time should be 95% of requested, maximum should
* be 145%. Unity doesn't let us specify an asymmetric error though.
*
* Would be nice to have tighter upper tolerance, but in practice we've seen
* a few devices unable to sustain theoretical throughput - flash wait states?
*/
#define TOLERANCE_MIN 0.95f
#define TOLERANCE_MAX 1.45f
#define MIDPOINT ((TOLERANCE_MIN+TOLERANCE_MAX)/2)
#define DELTA (MIDPOINT-TOLERANCE_MIN)

/* This test verifies if wait_ns's wait time
* is accurate, according to a timer.
*
* Given timer is created.
* When timer is used to measure delay.
* Then the results are valid (within acceptable range).
*/
template<int wait_val_ms, class CompareTimer>
void test_wait_ns_time_measurement()
{
CompareTimer timer;

float wait_val_s = (float)wait_val_ms / 1000;

/* Start the timer. */
timer.start();

/* Wait <wait_val_ms> ms - arithmetic inside wait_ns will overflow if
* asked for too large a delay, so break it up.
*/
for (int i = 0; i < wait_val_ms; i++) {
wait_ns(1000000);
}

/* Stop the timer. */
timer.stop();

/* Check results - wait_val_us us have elapsed. */
TEST_ASSERT_FLOAT_WITHIN(DELTA * wait_val_s, MIDPOINT * wait_val_s, timer.read());
}

utest::v1::status_t test_setup(const size_t number_of_cases)
{
GREENTEA_SETUP(15, "default_auto");
return verbose_test_setup_handler(number_of_cases);
}

Case cases[] = {
#if DEVICE_LPTICKER
Case("Test: wait_ns - compare with lp_timer 1s", test_wait_ns_time_measurement<1000, LowPowerTimer>),
#endif
Case("Test: wait_ns - compare with us_timer 1s", test_wait_ns_time_measurement<1000, Timer>)
};

Specification specification(test_setup, cases);

int main()
{
return !Harness::run(specification);
}
36 changes: 34 additions & 2 deletions platform/mbed_wait_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,43 @@ void wait_ms(int ms);
*
* @note
* This function always spins to get the exact number of microseconds.
* If RTOS is present, this will affect power (by preventing deep sleep) and
* multithread performance. Therefore, spinning for millisecond wait is not recommended.
* This will affect power and multithread performance. Therefore, spinning for
* millisecond wait is not recommended, and wait_ms() should
* be used instead.
*
* @note You may call this function from ISR context, but large delays may
* impact system stability - interrupt handlers should take less than
* 50us.
*/
void wait_us(int us);

/** Waits a number of nanoseconds.
*
* This function spins the CPU to produce a small delay. It should normally
* only be used for delays of 10us (10000ns) or less. As it is calculated
* based on the expected execution time of a software loop, it may well run
* slower than requested based on activity from other threads and interrupts.
* If greater precision is required, this can be called from inside a critical
* section.
*
* @param ns the number of nanoseconds to wait
*
* @note
* wait_us() will likely give more precise time than wait_ns for large-enough
* delays, as it is based on a timer, but its set-up time may be excessive
* for the smallest microsecond counts, at which point wait_ns() is better.
*
* @note
* Any delay larger than a millisecond (1000000ns) is liable to cause
* overflow in the internal loop calculation. You shouldn't normally be
* using this for such large delays anyway in real code, but be aware if
* calibrating. Make repeated calls for longer test runs.
*
* @note You may call this function from ISR context.
*
*/
void wait_ns(unsigned int ns);

#ifdef __cplusplus
}
#endif
Expand Down
66 changes: 65 additions & 1 deletion platform/mbed_wait_api_no_rtos.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,14 @@
* limitations under the License.
*/

#include "cmsis.h"
#include "platform/mbed_toolchain.h"
#include "platform/mbed_wait_api.h"

// This implementation of the wait functions will be compiled only
// if the RTOS is not present.
#ifndef MBED_CONF_RTOS_PRESENT

#include "platform/mbed_wait_api.h"
#include "hal/us_ticker_api.h"

void wait(float s)
Expand All @@ -41,3 +44,64 @@ void wait_us(int us)

#endif // #ifndef MBED_CONF_RTOS_PRESENT

// This wait_ns is used by both RTOS and non-RTOS builds

#ifdef __CORTEX_M
#if (__CORTEX_M == 0 && !defined __CM0PLUS_REV) || __CORTEX_M == 1
// Cortex-M0 and Cortex-M1 take 6 cycles per iteration - SUBS = 1, 2xNOP = 2, BCS = 3
#define LOOP_SCALER 6000
#elif (__CORTEX_M == 0 && defined __CM0PLUS_REV) || __CORTEX_M == 3 || __CORTEX_M == 4 || \
__CORTEX_M == 23 || __CORTEX_M == 33
// Cortex-M0+, M3, M4, M23 and M33 take 5 cycles per iteration - SUBS = 1, 2xNOP = 2, BCS = 2
// TODO - check M33
#define LOOP_SCALER 5000
#elif __CORTEX_M == 7
// Cortex-M7 manages to dual-issue for 2 cycles per iteration (SUB,NOP) = 1, (NOP,BCS) = 1
// (The NOPs were added to stabilise this - with just the SUB and BCS, it seems that the
// M7 sometimes takes 1 cycle, sometimes 2, possibly depending on alignment)
#define LOOP_SCALER 2000
#endif
#elif defined __CORTEX_A
#if __CORTEX_A == 9
// Cortex-A9 is dual-issue, so let's assume same performance as Cortex-M7.
// TODO - test.
#define LOOP_SCALER 2000
#endif
#endif

/* We only define the function if we've identified the CPU. If we haven't,
* rather than a compile-time error, leave it undefined, rather than faulting
* with an immediate #error. This leaves the door open to non-ARM
* builds with or people providing substitutes for other CPUs, and only if
* needed.
*/
#ifdef LOOP_SCALER

/* Timing seems to depend on alignment, and toolchains do not support aligning
* functions well. So sidestep that by hand-assembling the code. Also avoids
* the hassle of handling multiple toolchains with different assembler
* syntax.
*/
MBED_ALIGN(8)
static const uint16_t delay_loop_code[] = {
0x1E40, // SUBS R0,R0,#1
0xBF00, // NOP
0xBF00, // NOP
0xD2FB, // BCS .-3 (0x00 would be .+2, so 0xFB = -5 = .-3)
0x4770 // BX LR
};

/* Take the address of the code, set LSB to indicate Thumb, and cast to void() function pointer */
#define delay_loop ((void(*)()) ((uintptr_t) delay_loop_code | 1))

void wait_ns(unsigned int ns)
{
uint32_t cycles_per_us = SystemCoreClock / 1000000;
// Note that this very calculation, plus call overhead, will take multiple
// cycles. Could well be 100ns on its own... So round down here, startup is
// worth at least one loop iteration.
uint32_t count = (cycles_per_us * ns) / LOOP_SCALER;

delay_loop(count);
}
#endif // LOOP_SCALER
2 changes: 1 addition & 1 deletion targets/TARGET_NUVOTON/TARGET_M2351/device/M2351.h
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ typedef enum IRQn
/*@}*/ /* end of group CMSIS */


#include "core_armv8mbl.h" /* Processor and core peripherals */
#include "core_cm23.h" /* Processor and core peripherals */
#include "system_M2351.h" /* System Header */

/**
Expand Down

0 comments on commit e6caa12

Please sign in to comment.