From 3b7b7bd3b356cd00d9b4986e6d0f36bfeba5bcb2 Mon Sep 17 00:00:00 2001 From: Jamie Smith Date: Fri, 5 Jan 2024 10:15:08 -0800 Subject: [PATCH 1/2] Enable RPi Pico's optimized ROM floating point routines --- targets/TARGET_RASPBERRYPI/CMakeLists.txt | 81 ++ .../src/rp2_common/hardware_divider/divider.S | 55 ++ .../include/hardware/divider.h | 404 +++++++++ .../include/hardware/divider_helper.S | 68 ++ .../src/rp2_common/pico_float/float_aeabi.S | 769 ++++++++++++++++++ .../rp2_common/pico_float/float_init_rom.c | 70 ++ .../src/rp2_common/pico_float/float_math.c | 582 +++++++++++++ .../src/rp2_common/pico_float/float_none.S | 78 ++ .../rp2_common/pico_float/float_v1_rom_shim.S | 346 ++++++++ .../pico_float/include/pico/float.h | 61 ++ .../pico_standard_link/binary_info.c | 95 --- .../src/rp2_common/pico_standard_link/crt0.S | 352 -------- .../src/rp2_common/pico_standard_link/doc.h | 10 - .../TARGET_RASPBERRYPI/reimport_pico_sdk.py | 2 + 14 files changed, 2516 insertions(+), 457 deletions(-) create mode 100644 targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/hardware_divider/divider.S create mode 100644 targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/hardware_divider/include/hardware/divider.h create mode 100644 targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/hardware_divider/include/hardware/divider_helper.S create mode 100644 targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_aeabi.S create mode 100644 targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_init_rom.c create mode 100644 targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_math.c create mode 100644 targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_none.S create mode 100644 targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_v1_rom_shim.S create mode 100644 targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/include/pico/float.h delete mode 100644 targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_standard_link/binary_info.c delete mode 100644 targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_standard_link/crt0.S delete mode 100644 targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_standard_link/doc.h diff --git a/targets/TARGET_RASPBERRYPI/CMakeLists.txt b/targets/TARGET_RASPBERRYPI/CMakeLists.txt index 9f4a9f4441a..e07c82f0fd1 100644 --- a/targets/TARGET_RASPBERRYPI/CMakeLists.txt +++ b/targets/TARGET_RASPBERRYPI/CMakeLists.txt @@ -30,11 +30,83 @@ file(GENERATE CONTENT "${header_content}" ) +# add a link option to wrap the given function name; i.e. -Wl:wrap=FUNCNAME for gcc +function(pico_wrap_function TARGET FUNCNAME) + target_link_options(${TARGET} INTERFACE "LINKER:--wrap=${FUNCNAME}") +endfunction() + +# Following is copied from src/rp2_common/pico_float/CMakeLists.txt +function(wrap_float_functions TARGET) + pico_wrap_function(${TARGET} __aeabi_fadd) + pico_wrap_function(${TARGET} __aeabi_fdiv) + pico_wrap_function(${TARGET} __aeabi_fmul) + pico_wrap_function(${TARGET} __aeabi_frsub) + pico_wrap_function(${TARGET} __aeabi_fsub) + pico_wrap_function(${TARGET} __aeabi_cfcmpeq) + pico_wrap_function(${TARGET} __aeabi_cfrcmple) + pico_wrap_function(${TARGET} __aeabi_cfcmple) + pico_wrap_function(${TARGET} __aeabi_fcmpeq) + pico_wrap_function(${TARGET} __aeabi_fcmplt) + pico_wrap_function(${TARGET} __aeabi_fcmple) + pico_wrap_function(${TARGET} __aeabi_fcmpge) + pico_wrap_function(${TARGET} __aeabi_fcmpgt) + pico_wrap_function(${TARGET} __aeabi_fcmpun) + pico_wrap_function(${TARGET} __aeabi_i2f) + pico_wrap_function(${TARGET} __aeabi_l2f) + pico_wrap_function(${TARGET} __aeabi_ui2f) + pico_wrap_function(${TARGET} __aeabi_ul2f) + pico_wrap_function(${TARGET} __aeabi_f2iz) + pico_wrap_function(${TARGET} __aeabi_f2lz) + pico_wrap_function(${TARGET} __aeabi_f2uiz) + pico_wrap_function(${TARGET} __aeabi_f2ulz) + pico_wrap_function(${TARGET} __aeabi_f2d) + pico_wrap_function(${TARGET} sqrtf) + pico_wrap_function(${TARGET} cosf) + pico_wrap_function(${TARGET} sinf) + pico_wrap_function(${TARGET} tanf) + pico_wrap_function(${TARGET} atan2f) + pico_wrap_function(${TARGET} expf) + pico_wrap_function(${TARGET} logf) + + pico_wrap_function(${TARGET} ldexpf) + pico_wrap_function(${TARGET} copysignf) + pico_wrap_function(${TARGET} truncf) + pico_wrap_function(${TARGET} floorf) + pico_wrap_function(${TARGET} ceilf) + pico_wrap_function(${TARGET} roundf) + pico_wrap_function(${TARGET} sincosf) # gnu + pico_wrap_function(${TARGET} asinf) + pico_wrap_function(${TARGET} acosf) + pico_wrap_function(${TARGET} atanf) + pico_wrap_function(${TARGET} sinhf) + pico_wrap_function(${TARGET} coshf) + pico_wrap_function(${TARGET} tanhf) + pico_wrap_function(${TARGET} asinhf) + pico_wrap_function(${TARGET} acoshf) + pico_wrap_function(${TARGET} atanhf) + pico_wrap_function(${TARGET} exp2f) + pico_wrap_function(${TARGET} log2f) + pico_wrap_function(${TARGET} exp10f) + pico_wrap_function(${TARGET} log10f) + pico_wrap_function(${TARGET} powf) + pico_wrap_function(${TARGET} powintf) #gnu + pico_wrap_function(${TARGET} hypotf) + pico_wrap_function(${TARGET} cbrtf) + pico_wrap_function(${TARGET} fmodf) + pico_wrap_function(${TARGET} dremf) + pico_wrap_function(${TARGET} remainderf) + pico_wrap_function(${TARGET} remquof) + pico_wrap_function(${TARGET} expm1f) + pico_wrap_function(${TARGET} log1pf) + pico_wrap_function(${TARGET} fmaf) +endfunction() + # Now, add includes and headers from the Pico SDK target_include_directories(mbed-raspberrypi INTERFACE . pico-sdk/src/rp2_common/hardware_adc/include + pico-sdk/src/rp2_common/hardware_divider/include pico-sdk/src/rp2_common/hardware_gpio/include pico-sdk/src/rp2_common/hardware_resets/include pico-sdk/src/rp2_common/hardware_pwm/include @@ -54,6 +126,7 @@ target_include_directories(mbed-raspberrypi pico-sdk/src/rp2_common/pico_platform/include pico-sdk/src/rp2_common/pico_fix/rp2040_usb_device_enumeration/include/ pico-sdk/src/rp2_common/pico_bootrom/include + pico-sdk/src/rp2_common/pico_float/include pico-sdk/src/rp2_common/hardware_claim/include pico-sdk/src/common/pico_sync/include pico-sdk/src/common/pico_time/include @@ -89,6 +162,11 @@ target_sources(mbed-raspberrypi pico-sdk/src/common/pico_time/time.c pico-sdk/src/common/pico_sync/lock_core.c pico-sdk/src/rp2_common/cmsis/stub/CMSIS/Device/RaspberryPi/RP2040/Source/system_RP2040.c + pico-sdk/src/rp2_common/pico_float/float_aeabi.S + pico-sdk/src/rp2_common/pico_float/float_init_rom.c + pico-sdk/src/rp2_common/pico_float/float_math.c + pico-sdk/src/rp2_common/pico_float/float_v1_rom_shim.S + pico-sdk/src/rp2_common/hardware_divider/divider.S ) target_compile_definitions(mbed-raspberrypi @@ -110,4 +188,7 @@ target_sources(mbed-rp2040 pico-sdk/src/rp2_common/pico_fix/rp2040_usb_device_enumeration/rp2040_usb_device_enumeration.c ) +# Enable usage of the RPi Pico optimized floating point routines +wrap_float_functions(mbed-rp2040) + add_subdirectory(TARGET_RP2040 EXCLUDE_FROM_ALL) diff --git a/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/hardware_divider/divider.S b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/hardware_divider/divider.S new file mode 100644 index 00000000000..9d90e24a937 --- /dev/null +++ b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/hardware_divider/divider.S @@ -0,0 +1,55 @@ +#include "pico/asm_helper.S" +#include "hardware/regs/addressmap.h" +#include "hardware/regs/sio.h" + +pico_default_asm_setup + +// tag::hw_div_s32[] +regular_func_with_section hw_divider_divmod_s32 + ldr r3, =(SIO_BASE) + str r0, [r3, #SIO_DIV_SDIVIDEND_OFFSET] + str r1, [r3, #SIO_DIV_SDIVISOR_OFFSET] + b hw_divider_divmod_return +// end::hw_div_s32[] + +// tag::hw_div_u32[] +regular_func_with_section hw_divider_divmod_u32 + ldr r3, =(SIO_BASE) + str r0, [r3, #SIO_DIV_UDIVIDEND_OFFSET] + str r1, [r3, #SIO_DIV_UDIVISOR_OFFSET] + b hw_divider_divmod_return +// end::hw_div_u32[] + +// Common delay and return section for s32 and u32 +.section .text.hw_divider_divmod_return +hw_divider_divmod_return: + // Branching here is 2 cycles, delay another 6 + b 1f +1: b 1f +1: b 1f +1: // return 64 bit value so we can efficiently return both (note quotient must be read last) + ldr r1, [r3, #SIO_DIV_REMAINDER_OFFSET] + ldr r0, [r3, #SIO_DIV_QUOTIENT_OFFSET] + bx lr + +regular_func_with_section hw_divider_save_state + ldr r3, =SIO_BASE + ldr r1, [r3, #SIO_DIV_UDIVIDEND_OFFSET] + ldr r2, [r3, #SIO_DIV_UDIVISOR_OFFSET] + stmia r0!, {r1-r2} + // The 8 cycles needed to guarantee that the result is ready is ensured by the preceeding + // code of 7 cycles together with any branch to it taking at least 2 cycles. + ldr r1, [r3, #SIO_DIV_REMAINDER_OFFSET] + ldr r2, [r3, #SIO_DIV_QUOTIENT_OFFSET] + stmia r0!, {r1-r2} + bx lr + +regular_func_with_section hw_divider_restore_state + ldr r3, =SIO_BASE + ldmia r0!, {r1-r2} + str r1, [r3, #SIO_DIV_UDIVIDEND_OFFSET] + str r2, [r3, #SIO_DIV_UDIVISOR_OFFSET] + ldmia r0!, {r1-r2} + str r1, [r3, #SIO_DIV_REMAINDER_OFFSET] + str r2, [r3, #SIO_DIV_QUOTIENT_OFFSET] + bx lr diff --git a/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/hardware_divider/include/hardware/divider.h b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/hardware_divider/include/hardware/divider.h new file mode 100644 index 00000000000..081034aac9f --- /dev/null +++ b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/hardware_divider/include/hardware/divider.h @@ -0,0 +1,404 @@ +/* + * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef _HARDWARE_DIVIDER_H +#define _HARDWARE_DIVIDER_H + +#include "pico.h" +#include "hardware/structs/sio.h" + +/** \file hardware/divider.h + * \defgroup hardware_divider hardware_divider + * + * Low-level hardware-divider access + * + * The SIO contains an 8-cycle signed/unsigned divide/modulo circuit, per core. Calculation is started by writing a dividend + * and divisor to the two argument registers, DIVIDEND and DIVISOR. The divider calculates the quotient / and remainder % of + * this division over the next 8 cycles, and on the 9th cycle the results can be read from the two result registers + * DIV_QUOTIENT and DIV_REMAINDER. A 'ready' bit in register DIV_CSR can be polled to wait for the calculation to + * complete, or software can insert a fixed 8-cycle delay + * + * This header provides low level macros and inline functions for accessing the hardware dividers directly, + * and perhaps most usefully performing asynchronous divides. These functions however do not follow the regular + * SDK conventions for saving/restoring the divider state, so are not generally safe to call from interrupt handlers + * + * The pico_divider library provides a more user friendly set of APIs over the divider (and support for + * 64 bit divides), and of course by default regular C language integer divisions are redirected through that library, meaning + * you can just use C level `/` and `%` operators and gain the benefits of the fast hardware divider. + * + * @see pico_divider + * + * \subsection divider_example Example + * \addtogroup hardware_divider + * \include hello_divider.c + */ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef uint64_t divmod_result_t; + +/*! \brief Start a signed asynchronous divide + * \ingroup hardware_divider + * + * Start a divide of the specified signed parameters. You should wait for 8 cycles (__div_pause()) or wait for the ready bit to be set + * (hw_divider_wait_ready()) prior to reading the results. + * + * \param a The dividend + * \param b The divisor + */ +static inline void hw_divider_divmod_s32_start(int32_t a, int32_t b) { + check_hw_layout( sio_hw_t, div_sdividend, SIO_DIV_SDIVIDEND_OFFSET); + sio_hw->div_sdividend = (uint32_t)a; + sio_hw->div_sdivisor = (uint32_t)b; +} + +/*! \brief Start an unsigned asynchronous divide + * \ingroup hardware_divider + * + * Start a divide of the specified unsigned parameters. You should wait for 8 cycles (__div_pause()) or wait for the ready bit to be set + * (hw_divider_wait_ready()) prior to reading the results. + * + * \param a The dividend + * \param b The divisor + */ +static inline void hw_divider_divmod_u32_start(uint32_t a, uint32_t b) { + check_hw_layout( + sio_hw_t, div_udividend, SIO_DIV_UDIVIDEND_OFFSET); + sio_hw->div_udividend = a; + sio_hw->div_udivisor = b; +} + +/*! \brief Wait for a divide to complete + * \ingroup hardware_divider + * + * Wait for a divide to complete + */ +static inline void hw_divider_wait_ready(void) { + // this is #1 in lsr below + static_assert(SIO_DIV_CSR_READY_BITS == 1, ""); + + // we use one less register and instruction than gcc which uses a TST instruction + + uint32_t tmp; // allow compiler to pick scratch register + pico_default_asm_volatile ( + "hw_divider_result_loop_%=:" + "ldr %0, [%1, %2]\n\t" + "lsrs %0, %0, #1\n\t" + "bcc hw_divider_result_loop_%=\n\t" + : "=&l" (tmp) + : "l" (sio_hw), "I" (SIO_DIV_CSR_OFFSET) + : + ); +} + +/*! \brief Return result of HW divide, nowait + * \ingroup hardware_divider + * + * \note This is UNSAFE in that the calculation may not have been completed. + * + * \return Current result. Most significant 32 bits are the remainder, lower 32 bits are the quotient. + */ +static inline divmod_result_t hw_divider_result_nowait(void) { + // as ugly as this looks it is actually quite efficient + divmod_result_t rc = ((divmod_result_t) sio_hw->div_remainder) << 32u; + rc |= sio_hw->div_quotient; + return rc; +} + +/*! \brief Return result of last asynchronous HW divide + * \ingroup hardware_divider + * + * This function waits for the result to be ready by calling hw_divider_wait_ready(). + * + * \return Current result. Most significant 32 bits are the remainder, lower 32 bits are the quotient. + */ +static inline divmod_result_t hw_divider_result_wait(void) { + hw_divider_wait_ready(); + return hw_divider_result_nowait(); +} + +/*! \brief Return result of last asynchronous HW divide, unsigned quotient only + * \ingroup hardware_divider + * + * This function waits for the result to be ready by calling hw_divider_wait_ready(). + * + * \return Current unsigned quotient result. + */ +static inline uint32_t hw_divider_u32_quotient_wait(void) { + hw_divider_wait_ready(); + return sio_hw->div_quotient; +} + +/*! \brief Return result of last asynchronous HW divide, signed quotient only + * \ingroup hardware_divider + * + * This function waits for the result to be ready by calling hw_divider_wait_ready(). + * + * \return Current signed quotient result. + */ +static inline int32_t hw_divider_s32_quotient_wait(void) { + hw_divider_wait_ready(); + return (int32_t)sio_hw->div_quotient; +} + +/*! \brief Return result of last asynchronous HW divide, unsigned remainder only + * \ingroup hardware_divider + * + * This function waits for the result to be ready by calling hw_divider_wait_ready(). + * + * \return Current unsigned remainder result. + */ +static inline uint32_t hw_divider_u32_remainder_wait(void) { + hw_divider_wait_ready(); + uint32_t rc = sio_hw->div_remainder; + sio_hw->div_quotient; // must read quotient to cooperate with other SDK code + return rc; +} + +/*! \brief Return result of last asynchronous HW divide, signed remainder only + * \ingroup hardware_divider + * + * This function waits for the result to be ready by calling hw_divider_wait_ready(). + * + * \return Current remainder results. + */ +static inline int32_t hw_divider_s32_remainder_wait(void) { + hw_divider_wait_ready(); + int32_t rc = (int32_t)sio_hw->div_remainder; + sio_hw->div_quotient; // must read quotient to cooperate with other SDK code + return rc; +} + +/*! \brief Do a signed HW divide and wait for result + * \ingroup hardware_divider + * + * Divide \p a by \p b, wait for calculation to complete, return result as a pair of 32-bit quotient/remainder values. + * + * \param a The dividend + * \param b The divisor + * \return Results of divide as a pair of 32-bit quotient/remainder values. + */ +divmod_result_t hw_divider_divmod_s32(int32_t a, int32_t b); + +/*! \brief Do an unsigned HW divide and wait for result + * \ingroup hardware_divider + * + * Divide \p a by \p b, wait for calculation to complete, return result as a pair of 32-bit quotient/remainder values. + * + * \param a The dividend + * \param b The divisor + * \return Results of divide as a pair of 32-bit quotient/remainder values. + */ +divmod_result_t hw_divider_divmod_u32(uint32_t a, uint32_t b); + +/*! \brief Efficient extraction of unsigned quotient from 32p32 fixed point + * \ingroup hardware_divider + * + * \param r A pair of 32-bit quotient/remainder values. + * \return Unsigned quotient + */ +inline static uint32_t to_quotient_u32(divmod_result_t r) { + return (uint32_t) r; +} + +/*! \brief Efficient extraction of signed quotient from 32p32 fixed point + * \ingroup hardware_divider + * + * \param r A pair of 32-bit quotient/remainder values. + * \return Unsigned quotient + */ +inline static int32_t to_quotient_s32(divmod_result_t r) { + return (int32_t)(uint32_t)r; +} + +/*! \brief Efficient extraction of unsigned remainder from 32p32 fixed point + * \ingroup hardware_divider + * + * \param r A pair of 32-bit quotient/remainder values. + * \return Unsigned remainder + * + * \note On Arm this is just a 32 bit register move or a nop + */ +inline static uint32_t to_remainder_u32(divmod_result_t r) { + return (uint32_t)(r >> 32u); +} + +/*! \brief Efficient extraction of signed remainder from 32p32 fixed point + * \ingroup hardware_divider + * + * \param r A pair of 32-bit quotient/remainder values. + * \return Signed remainder + * + * \note On arm this is just a 32 bit register move or a nop + */ +inline static int32_t to_remainder_s32(divmod_result_t r) { + return (int32_t)(r >> 32u); +} + +/*! \brief Do an unsigned HW divide, wait for result, return quotient + * \ingroup hardware_divider + * + * Divide \p a by \p b, wait for calculation to complete, return quotient. + * + * \param a The dividend + * \param b The divisor + * \return Quotient results of the divide + */ +static inline uint32_t hw_divider_u32_quotient(uint32_t a, uint32_t b) { + return to_quotient_u32(hw_divider_divmod_u32(a, b)); +} + +/*! \brief Do an unsigned HW divide, wait for result, return remainder + * \ingroup hardware_divider + * + * Divide \p a by \p b, wait for calculation to complete, return remainder. + * + * \param a The dividend + * \param b The divisor + * \return Remainder results of the divide + */ +static inline uint32_t hw_divider_u32_remainder(uint32_t a, uint32_t b) { + return to_remainder_u32(hw_divider_divmod_u32(a, b)); +} + +/*! \brief Do a signed HW divide, wait for result, return quotient + * \ingroup hardware_divider + * + * Divide \p a by \p b, wait for calculation to complete, return quotient. + * + * \param a The dividend + * \param b The divisor + * \return Quotient results of the divide + */ +static inline int32_t hw_divider_quotient_s32(int32_t a, int32_t b) { + return to_quotient_s32(hw_divider_divmod_s32(a, b)); +} + +/*! \brief Do a signed HW divide, wait for result, return remainder + * \ingroup hardware_divider + * + * Divide \p a by \p b, wait for calculation to complete, return remainder. + * + * \param a The dividend + * \param b The divisor + * \return Remainder results of the divide + */ +static inline int32_t hw_divider_remainder_s32(int32_t a, int32_t b) { + return to_remainder_s32(hw_divider_divmod_s32(a, b)); +} + +/*! \brief Pause for exact amount of time needed for a asynchronous divide to complete + * \ingroup hardware_divider + */ +static inline void hw_divider_pause(void) { + pico_default_asm_volatile( + "b _1_%=\n" + "_1_%=:\n" + "b _2_%=\n" + "_2_%=:\n" + "b _3_%=\n" + "_3_%=:\n" + "b _4_%=\n" + "_4_%=:\n" + :::); +} + +/*! \brief Do a hardware unsigned HW divide, wait for result, return quotient + * \ingroup hardware_divider + * + * Divide \p a by \p b, wait for calculation to complete, return quotient. + * + * \param a The dividend + * \param b The divisor + * \return Quotient result of the divide + */ +static inline uint32_t hw_divider_u32_quotient_inlined(uint32_t a, uint32_t b) { + hw_divider_divmod_u32_start(a, b); + hw_divider_pause(); + return sio_hw->div_quotient; +} + +/*! \brief Do a hardware unsigned HW divide, wait for result, return remainder + * \ingroup hardware_divider + * + * Divide \p a by \p b, wait for calculation to complete, return remainder. + * + * \param a The dividend + * \param b The divisor + * \return Remainder result of the divide + */ +static inline uint32_t hw_divider_u32_remainder_inlined(uint32_t a, uint32_t b) { + hw_divider_divmod_u32_start(a, b); + hw_divider_pause(); + uint32_t rc = sio_hw->div_remainder; + sio_hw->div_quotient; // must read quotient to cooperate with other SDK code + return rc; +} + +/*! \brief Do a hardware signed HW divide, wait for result, return quotient + * \ingroup hardware_divider + * + * Divide \p a by \p b, wait for calculation to complete, return quotient. + * + * \param a The dividend + * \param b The divisor + * \return Quotient result of the divide + */ +static inline int32_t hw_divider_s32_quotient_inlined(int32_t a, int32_t b) { + hw_divider_divmod_s32_start(a, b); + hw_divider_pause(); + return (int32_t)sio_hw->div_quotient; +} + +/*! \brief Do a hardware signed HW divide, wait for result, return remainder + * \ingroup hardware_divider + * + * Divide \p a by \p b, wait for calculation to complete, return remainder. + * + * \param a The dividend + * \param b The divisor + * \return Remainder result of the divide + */ +static inline int32_t hw_divider_s32_remainder_inlined(int32_t a, int32_t b) { + hw_divider_divmod_s32_start(a, b); + hw_divider_pause(); + int32_t rc = (int32_t)sio_hw->div_remainder; + sio_hw->div_quotient; // must read quotient to cooperate with other SDK code + return rc; +} + +typedef struct { + uint32_t values[4]; +} hw_divider_state_t; + +/*! \brief Save the calling cores hardware divider state + * \ingroup hardware_divider + * + * Copy the current core's hardware divider state into the provided structure. This method + * waits for the divider results to be stable, then copies them to memory. + * They can be restored via hw_divider_restore_state() + * + * \param dest the location to store the divider state + */ +void hw_divider_save_state(hw_divider_state_t *dest); + +/*! \brief Load a saved hardware divider state into the current core's hardware divider + * \ingroup hardware_divider + * + * Copy the passed hardware divider state into the hardware divider. + * + * \param src the location to load the divider state from + */ + +void hw_divider_restore_state(hw_divider_state_t *src); + +#ifdef __cplusplus +} +#endif + +#endif // _HARDWARE_DIVIDER_H diff --git a/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/hardware_divider/include/hardware/divider_helper.S b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/hardware_divider/include/hardware/divider_helper.S new file mode 100644 index 00000000000..ff4ed919bdb --- /dev/null +++ b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/hardware_divider/include/hardware/divider_helper.S @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +// Note this file is always included by another, so does not do pico_default_asm_setup +#include "hardware/regs/addressmap.h" +#include "hardware/regs/sio.h" + +#if SIO_DIV_CSR_READY_LSB == 0 +.equ SIO_DIV_CSR_READY_SHIFT_FOR_CARRY, 1 +#else +need to change SHIFT above +#endif +#if SIO_DIV_CSR_DIRTY_LSB == 1 +.equ SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY, 2 +#else +need to change SHIFT above +#endif + +// SIO_BASE ptr in r2; pushes r4-r7, lr to stack +.macro save_div_state_and_lr +// originally we did this, however a) it uses r3, and b) the push and dividend/divisor +// readout takes 8 cycles, c) any IRQ which uses the divider will necessarily put the +// data back, which will immediately make it ready +// +// // ldr r3, [r2, #SIO_DIV_CSR_OFFSET] +// // // wait for results as we can't save signed-ness of operation +// // 1: +// // lsrs r3, #SIO_DIV_CSR_READY_SHIFT_FOR_CARRY +// // bcc 1b + +// 6 cycle push + 2 ldr ensures the 8 cycle delay before remainder and quotient are ready +push {r4, r5, r6, r7, lr} +// note we must read quotient last, and since it isn't the last reg, we'll not use ldmia! +ldr r4, [r2, #SIO_DIV_UDIVIDEND_OFFSET] +ldr r5, [r2, #SIO_DIV_UDIVISOR_OFFSET] +ldr r7, [r2, #SIO_DIV_REMAINDER_OFFSET] +ldr r6, [r2, #SIO_DIV_QUOTIENT_OFFSET] +.endm + +// restores divider state from r4-r7, then pops them and pc +.macro restore_div_state_and_return +// writing sdividend (r4), sdivisor (r5), quotient (r6), remainder (r7) in that order +// +// it is worth considering what happens if we are interrupted +// +// after writing r4: we are DIRTY and !READY +// ... interruptor using div will complete based on incorrect inputs, but dividend at least will be +// saved/restored correctly and we'll restore the rest ourselves +// after writing r4, r5: we are DIRTY and !READY +// ... interruptor using div will complete based on possibly wrongly signed inputs, but dividend, divisor +// at least will be saved/restored correctly and and we'll restore the rest ourselves +// after writing r4, r5, r6: we are DIRTY and READY +// ... interruptor using div will dividend, divisor, quotient registers as is (what we just restored ourselves), +// and we'll restore the remainder after the fact + +// note we are not use STM not because it can be restarted due to interrupt which is harmless, more because this is 1 cycle IO space +// and so 4 reads is cheaper (and we don't have to adjust r2) +// note also, that we must restore via UDIVI* rather than SDIVI* to prevent the quotient/remainder being negated on read based +// on the signs of the inputs +str r4, [r2, #SIO_DIV_UDIVIDEND_OFFSET] +str r5, [r2, #SIO_DIV_UDIVISOR_OFFSET] +str r7, [r2, #SIO_DIV_REMAINDER_OFFSET] +str r6, [r2, #SIO_DIV_QUOTIENT_OFFSET] +pop {r4, r5, r6, r7, pc} +.endm \ No newline at end of file diff --git a/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_aeabi.S b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_aeabi.S new file mode 100644 index 00000000000..e2862b5ce50 --- /dev/null +++ b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_aeabi.S @@ -0,0 +1,769 @@ +/* + * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "pico/asm_helper.S" +#include "pico/bootrom/sf_table.h" +#include "hardware/divider_helper.S" + +__pre_init __aeabi_float_init, 00020 + +pico_default_asm_setup + +.macro float_section name +#if PICO_FLOAT_IN_RAM +.section RAM_SECTION_NAME(\name), "ax" +#else +.section SECTION_NAME(\name), "ax" +#endif +.endm + +.macro float_wrapper_section func +float_section WRAPPER_FUNC_NAME(\func) +.endm + +.macro _float_wrapper_func x + wrapper_func \x +.endm + +.macro wrapper_func_f1 x + _float_wrapper_func \x +#if PICO_FLOAT_PROPAGATE_NANS + mov ip, lr + bl __check_nan_f1 + mov lr, ip +#endif +.endm + +.macro wrapper_func_f2 x + _float_wrapper_func \x +#if PICO_FLOAT_PROPAGATE_NANS + mov ip, lr + bl __check_nan_f2 + mov lr, ip +#endif +.endm + +.section .text + +#if PICO_FLOAT_PROPAGATE_NANS +.thumb_func +__check_nan_f1: + movs r3, #1 + lsls r3, #24 + lsls r2, r0, #1 + adds r2, r3 + bhi 1f + bx lr +1: + bx ip + +.thumb_func +__check_nan_f2: + movs r3, #1 + lsls r3, #24 + lsls r2, r0, #1 + adds r2, r3 + bhi 1f + lsls r2, r1, #1 + adds r2, r3 + bhi 2f + bx lr +2: + mov r0, r1 +1: + bx ip +#endif + +.macro table_tail_call SF_TABLE_OFFSET +#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED +#ifndef NDEBUG + movs r3, #0 + mov ip, r3 +#endif +#endif + ldr r3, =sf_table + ldr r3, [r3, #\SF_TABLE_OFFSET] + bx r3 +.endm + +.macro shimmable_table_tail_call SF_TABLE_OFFSET shim + ldr r3, =sf_table + ldr r3, [r3, #\SF_TABLE_OFFSET] +#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED + mov ip, pc +#endif + bx r3 +#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED +.byte \SF_TABLE_OFFSET, 0xdf +.word \shim +#endif +.endm + + +// note generally each function is in a separate section unless there is fall thru or branching between them +// note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool + +// note functions are word aligned except where they are an odd number of linear instructions + +// float FUNC_NAME(__aeabi_fadd)(float, float) single-precision addition +float_wrapper_section __aeabi_farithmetic +// float FUNC_NAME(__aeabi_frsub)(float x, float y) single-precision reverse subtraction, y - x + +// frsub first because it is the only one that needs alignment +.align 2 +wrapper_func __aeabi_frsub + eors r0, r1 + eors r1, r0 + eors r0, r1 + // fall thru + +// float FUNC_NAME(__aeabi_fsub)(float x, float y) single-precision subtraction, x - y +wrapper_func_f2 __aeabi_fsub +#if PICO_FLOAT_PROPAGATE_NANS + // we want to return nan for inf-inf or -inf - -inf, but without too much upfront cost + mov r2, r0 + eors r2, r1 + bmi 1f // different signs + push {r0, r1, lr} + bl 1f + b fdiv_fsub_nan_helper +1: +#endif + table_tail_call SF_TABLE_FSUB + +wrapper_func_f2 __aeabi_fadd + table_tail_call SF_TABLE_FADD + +// float FUNC_NAME(__aeabi_fdiv)(float n, float d) single-precision division, n / d +wrapper_func_f2 __aeabi_fdiv +#if PICO_FLOAT_PROPAGATE_NANS + push {r0, r1, lr} + bl 1f + b fdiv_fsub_nan_helper +1: +#endif +#if !PICO_DIVIDER_DISABLE_INTERRUPTS + // to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty + ldr r2, =(SIO_BASE) + ldr r3, [r2, #SIO_DIV_CSR_OFFSET] + lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY + bcs fdiv_save_state +#else + // to avoid worrying about IRQs (or context switches), simply disable interrupts around call + push {r4, lr} + mrs r4, PRIMASK + cpsid i + bl fdiv_shim_call + msr PRIMASK, r4 + pop {r4, pc} +#endif +fdiv_shim_call: + table_tail_call SF_TABLE_FDIV +#if !PICO_DIVIDER_DISABLE_INTERRUPTS +fdiv_save_state: + save_div_state_and_lr + bl fdiv_shim_call + ldr r2, =(SIO_BASE) + restore_div_state_and_return +#endif + +fdiv_fsub_nan_helper: +#if PICO_FLOAT_PROPAGATE_NANS + pop {r1, r2} + + // check for infinite op infinite (or rather check for infinite result with both + // operands being infinite) + lsls r3, r0, #1 + asrs r3, r3, #24 + adds r3, #1 + beq 2f + pop {pc} +2: + lsls r1, #1 + asrs r1, r1, #24 + lsls r2, #1 + asrs r2, r2, #24 + ands r1, r2 + adds r1, #1 + bne 3f + // infinite to nan + movs r1, #1 + lsls r1, #22 + orrs r0, r1 +3: + pop {pc} +#endif + +// float FUNC_NAME(__aeabi_fmul)(float, float) single-precision multiplication +wrapper_func_f2 __aeabi_fmul +#if PICO_FLOAT_PROPAGATE_NANS + push {r0, r1, lr} + bl 1f + pop {r1, r2} + + // check for multiplication of infinite by zero (or rather check for infinite result with either + // operand 0) + lsls r3, r0, #1 + asrs r3, r3, #24 + adds r3, #1 + beq 2f + pop {pc} +2: + ands r1, r2 + bne 3f + // infinite to nan + movs r1, #1 + lsls r1, #22 + orrs r0, r1 +3: + pop {pc} +1: +#endif + table_tail_call SF_TABLE_FMUL + +// void FUNC_NAME(__aeabi_cfrcmple)(float, float) reversed 3-way (<, =, ?>) compare [1], result in PSR ZC flags +float_wrapper_section __aeabi_cfcmple +.align 2 +wrapper_func __aeabi_cfrcmple + push {r0-r2, lr} + eors r0, r1 + eors r1, r0 + eors r0, r1 + b __aeabi_cfcmple_guts + +// NOTE these share an implementation as we have no excepting NaNs. +// void FUNC_NAME(__aeabi_cfcmple)(float, float) 3-way (<, =, ?>) compare [1], result in PSR ZC flags +// void FUNC_NAME(__aeabi_cfcmpeq)(float, float) non-excepting equality comparison [1], result in PSR ZC flags +.align 2 +wrapper_func __aeabi_cfcmple +wrapper_func __aeabi_cfcmpeq + push {r0-r2, lr} + +__aeabi_cfcmple_guts: + lsls r2,r0,#1 + lsrs r2,#24 + beq 1f + cmp r2,#0xff + bne 2f + lsls r2, r0, #9 + bhi 3f +1: + lsrs r0,#23 @ clear mantissa if denormal or infinite + lsls r0,#23 +2: + lsls r2,r1,#1 + lsrs r2,#24 + beq 1f + cmp r2,#0xff + bne 2f + lsls r2, r1, #9 + bhi 3f +1: + lsrs r1,#23 @ clear mantissa if denormal or infinite + lsls r1,#23 +2: + movs r2,#1 @ initialise result + eors r1,r0 + bmi 2f @ opposite signs? then can proceed on basis of sign of x + eors r1,r0 @ restore y + bpl 1f + cmp r1,r0 + pop {r0-r2, pc} +1: + cmp r0,r1 + pop {r0-r2, pc} +2: + orrs r1, r0 @ handle 0/-0 + adds r1, r1 @ note this always sets C + beq 3f + mvns r0, r0 @ carry inverse of r0 sign + adds r0, r0 +3: + pop {r0-r2, pc} + + +// int FUNC_NAME(__aeabi_fcmpeq)(float, float) result (1, 0) denotes (=, ?<>) [2], use for C == and != +float_wrapper_section __aeabi_fcmpeq +.align 2 +wrapper_func __aeabi_fcmpeq + push {lr} + bl __aeabi_cfcmpeq + beq 1f + movs r0, #0 + pop {pc} +1: + movs r0, #1 + pop {pc} + +// int FUNC_NAME(__aeabi_fcmplt)(float, float) result (1, 0) denotes (<, ?>=) [2], use for C < +float_wrapper_section __aeabi_fcmplt +.align 2 +wrapper_func __aeabi_fcmplt + push {lr} + bl __aeabi_cfcmple + sbcs r0, r0 + pop {pc} + +// int FUNC_NAME(__aeabi_fcmple)(float, float) result (1, 0) denotes (<=, ?>) [2], use for C <= +float_wrapper_section __aeabi_fcmple +.align 2 +wrapper_func __aeabi_fcmple + push {lr} + bl __aeabi_cfcmple + bls 1f + movs r0, #0 + pop {pc} +1: + movs r0, #1 + pop {pc} + +// int FUNC_NAME(__aeabi_fcmpge)(float, float) result (1, 0) denotes (>=, ?<) [2], use for C >= +float_wrapper_section __aeabi_fcmpge +.align 2 +wrapper_func __aeabi_fcmpge + push {lr} + // because of NaNs it is better to reverse the args than the result + bl __aeabi_cfrcmple + bls 1f + movs r0, #0 + pop {pc} +1: + movs r0, #1 + pop {pc} + +// int FUNC_NAME(__aeabi_fcmpgt)(float, float) result (1, 0) denotes (>, ?<=) [2], use for C > +float_wrapper_section __aeabi_fcmpgt +wrapper_func __aeabi_fcmpgt + push {lr} + // because of NaNs it is better to reverse the args than the result + bl __aeabi_cfrcmple + sbcs r0, r0 + pop {pc} + +// int FUNC_NAME(__aeabi_fcmpun)(float, float) result (1, 0) denotes (?, <=>) [2], use for C99 isunordered() +float_wrapper_section __aeabi_fcmpun +wrapper_func __aeabi_fcmpun + movs r3, #1 + lsls r3, #24 + lsls r2, r0, #1 + adds r2, r3 + bhi 1f + lsls r2, r1, #1 + adds r2, r3 + bhi 1f + movs r0, #0 + bx lr +1: + movs r0, #1 + bx lr + + +// float FUNC_NAME(__aeabi_ui2f)(unsigned) unsigned to float (single precision) conversion +float_wrapper_section __aeabi_ui2f +wrapper_func __aeabi_ui2f + subs r1, r1 + cmp r0, #0 + bne __aeabi_i2f_main + mov r0, r1 + bx lr + +float_wrapper_section __aeabi_i2f +// float FUNC_NAME(__aeabi_i2f)(int) integer to float (single precision) conversion +wrapper_func __aeabi_i2f + lsrs r1, r0, #31 + lsls r1, #31 + bpl 1f + negs r0, r0 +1: + cmp r0, #0 + beq 7f +__aeabi_i2f_main: + + mov ip, lr + push {r0, r1} + ldr r3, =sf_clz_func + ldr r3, [r3] + blx r3 + pop {r1, r2} + lsls r1, r0 + subs r0, #158 + negs r0, r0 + + adds r1,#0x80 @ rounding + bcs 5f @ tripped carry? then have leading 1 in C as required (and result is even so can ignore sticky bits) + + lsls r3,r1,#24 @ check bottom 8 bits of r1 + beq 6f @ in rounding-tie case? + lsls r1,#1 @ remove leading 1 +3: + lsrs r1,#9 @ align mantissa + lsls r0,#23 @ align exponent + orrs r0,r2 @ assemble exponent and mantissa +4: + orrs r0,r1 @ apply sign +1: + bx ip +5: + adds r0,#1 @ correct exponent offset + b 3b +6: + lsrs r1,#9 @ ensure even result + lsls r1,#10 + b 3b +7: + bx lr + + +// int FUNC_NAME(__aeabi_f2iz)(float) float (single precision) to integer C-style conversion [3] +float_wrapper_section __aeabi_f2iz +wrapper_func __aeabi_f2iz +regular_func float2int_z + lsls r1, r0, #1 + lsrs r2, r1, #24 + movs r3, #0x80 + lsls r3, #24 + cmp r2, #126 + ble 1f + subs r2, #158 + bge 2f + asrs r1, r0, #31 + lsls r0, #9 + lsrs r0, #1 + orrs r0, r3 + negs r2, r2 + lsrs r0, r2 + lsls r1, #1 + adds r1, #1 + muls r0, r1 + bx lr +1: + movs r0, #0 + bx lr +2: + lsrs r0, #31 + adds r0, r3 + subs r0, #1 + bx lr + + cmn r0, r0 + bcc float2int + push {lr} + lsls r0, #1 + lsrs r0, #1 + movs r1, #0 + bl __aeabi_f2uiz + cmp r0, #0 + bmi 1f + negs r0, r0 + pop {pc} +1: + movs r0, #128 + lsls r0, #24 + pop {pc} + +float_section float2int +regular_func float2int + shimmable_table_tail_call SF_TABLE_FLOAT2INT float2int_shim + +float_section float2fix +regular_func float2fix + shimmable_table_tail_call SF_TABLE_FLOAT2FIX float2fix_shim + +float_section float2ufix +regular_func float2ufix + table_tail_call SF_TABLE_FLOAT2UFIX + +// unsigned FUNC_NAME(__aeabi_f2uiz)(float) float (single precision) to unsigned C-style conversion [3] +float_wrapper_section __aeabi_f2uiz +wrapper_func __aeabi_f2uiz + table_tail_call SF_TABLE_FLOAT2UINT + +float_section fix2float +regular_func fix2float + table_tail_call SF_TABLE_FIX2FLOAT + +float_section ufix2float +regular_func ufix2float + table_tail_call SF_TABLE_UFIX2FLOAT + +float_section fix642float +regular_func fix642float + shimmable_table_tail_call SF_TABLE_FIX642FLOAT fix642float_shim + +float_section ufix642float +regular_func ufix642float + shimmable_table_tail_call SF_TABLE_UFIX642FLOAT ufix642float_shim + +// float FUNC_NAME(__aeabi_l2f)(long long) long long to float (single precision) conversion +float_wrapper_section __aeabi_l2f +1: + ldr r2, =__aeabi_i2f + bx r2 +wrapper_func __aeabi_l2f + asrs r2, r0, #31 + cmp r1, r2 + beq 1b + shimmable_table_tail_call SF_TABLE_INT642FLOAT int642float_shim + +// float FUNC_NAME(__aeabi_l2f)(long long) long long to float (single precision) conversion +float_wrapper_section __aeabi_ul2f +1: + ldr r2, =__aeabi_ui2f + bx r2 +wrapper_func __aeabi_ul2f + cmp r1, #0 + beq 1b + shimmable_table_tail_call SF_TABLE_UINT642FLOAT uint642float_shim + +// long long FUNC_NAME(__aeabi_f2lz)(float) float (single precision) to long long C-style conversion [3] +float_wrapper_section __aeabi_f2lz +wrapper_func __aeabi_f2lz +regular_func float2int64_z + cmn r0, r0 + bcc float2int64 + push {lr} + lsls r0, #1 + lsrs r0, #1 + movs r1, #0 + bl float2ufix64 + cmp r1, #0 + bmi 1f + movs r2, #0 + negs r0, r0 + sbcs r2, r1 + mov r1, r2 + pop {pc} +1: + movs r1, #128 + lsls r1, #24 + movs r0, #0 + pop {pc} + +float_section float2int64 +regular_func float2int64 + shimmable_table_tail_call SF_TABLE_FLOAT2INT64 float2int64_shim + +float_section float2fix64 +regular_func float2fix64 + shimmable_table_tail_call SF_TABLE_FLOAT2FIX64 float2fix64_shim + +// unsigned long long FUNC_NAME(__aeabi_f2ulz)(float) float to unsigned long long C-style conversion [3] +float_wrapper_section __aeabi_f2ulz +wrapper_func __aeabi_f2ulz + shimmable_table_tail_call SF_TABLE_FLOAT2UINT64 float2uint64_shim + +float_section float2ufix64 +regular_func float2ufix64 + shimmable_table_tail_call SF_TABLE_FLOAT2UFIX64 float2ufix64_shim + +float_wrapper_section __aeabi_f2d +1: +#if PICO_FLOAT_PROPAGATE_NANS + // copy sign bit and 25 NAN id bits into sign bit and significant ID bits, also setting the high id bit + asrs r1, r0, #3 + movs r2, #0xf + lsls r2, #27 + orrs r1, r2 + lsls r0, #25 + bx lr +#endif +wrapper_func __aeabi_f2d +#if PICO_FLOAT_PROPAGATE_NANS + movs r3, #1 + lsls r3, #24 + lsls r2, r0, #1 + adds r2, r3 + bhi 1b +#endif + shimmable_table_tail_call SF_TABLE_FLOAT2DOUBLE float2double_shim + +float_wrapper_section sqrtf +wrapper_func_f1 sqrtf +#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED + // check for negative + asrs r1, r0, #23 + bmi 1f +#endif + table_tail_call SF_TABLE_FSQRT +#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED +1: + mvns r0, r1 + cmp r0, #255 + bne 2f + // -0 or -Denormal return -0 (0x80000000) + lsls r0, #31 + bx lr +2: + // return -Inf (0xff800000) + asrs r0, r1, #31 + lsls r0, #23 + bx lr +#endif + +float_wrapper_section cosf +// note we don't use _f1 since we do an infinity/nan check for outside of range +wrapper_func cosf + // rom version only works for -128 < angle < 128 + lsls r1, r0, #1 + lsrs r1, #24 + cmp r1, #127 + 7 + bge 1f +2: + table_tail_call SF_TABLE_FCOS +1: +#if PICO_FLOAT_PROPAGATE_NANS + // also check for infinites + cmp r1, #255 + bne 3f + // infinite to nan + movs r1, #1 + lsls r1, #22 + orrs r0, r1 + bx lr +3: +#endif + ldr r1, =0x40c90fdb // 2 * M_PI + push {lr} + bl remainderf + pop {r1} + mov lr, r1 + b 2b + +float_wrapper_section sinf +// note we don't use _f1 since we do an infinity/nan check for outside of range +wrapper_func sinf + // rom version only works for -128 < angle < 128 + lsls r1, r0, #1 + lsrs r1, #24 + cmp r1, #127 + 7 + bge 1f +2: + table_tail_call SF_TABLE_FSIN +1: +#if PICO_FLOAT_PROPAGATE_NANS + // also check for infinites + cmp r1, #255 + bne 3f + // infinite to nan + movs r1, #1 + lsls r1, #22 + orrs r0, r1 + bx lr +3: +#endif + ldr r1, =0x40c90fdb // 2 * M_PI + push {lr} + bl remainderf + pop {r1} + mov lr, r1 + b 2b + +float_wrapper_section sincosf +// note we don't use _f1 since we do an infinity/nan check for outside of range +wrapper_func sincosf + push {r1, r2, lr} + // rom version only works for -128 < angle < 128 + lsls r3, r0, #1 + lsrs r3, #24 + cmp r3, #127 + 7 + bge 3f +2: + ldr r3, =sf_table + ldr r3, [r3, #SF_TABLE_FSIN] + blx r3 + pop {r2, r3} + str r0, [r2] + str r1, [r3] + pop {pc} +#if PICO_FLOAT_PROPAGATE_NANS +.align 2 + pop {pc} +#endif +3: +#if PICO_FLOAT_PROPAGATE_NANS + // also check for infinites + cmp r3, #255 + bne 4f + // infinite to nan + movs r3, #1 + lsls r3, #22 + orrs r0, r3 + str r0, [r1] + str r0, [r2] + add sp, #12 + bx lr +4: +#endif + ldr r1, =0x40c90fdb // 2 * M_PI + push {lr} + bl remainderf + pop {r1} + mov lr, r1 + b 2b + +float_wrapper_section tanf +// note we don't use _f1 since we do an infinity/nan check for outside of range +wrapper_func tanf + // rom version only works for -128 < angle < 128 + lsls r1, r0, #1 + lsrs r1, #24 + cmp r1, #127 + 7 + bge ftan_out_of_range +ftan_in_range: +#if !PICO_DIVIDER_DISABLE_INTERRUPTS + // to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty + ldr r2, =(SIO_BASE) + ldr r3, [r2, #SIO_DIV_CSR_OFFSET] + lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY + bcs ftan_save_state +#else + // to avoid worrying about IRQs (or context switches), simply disable interrupts around call + push {r4, lr} + mrs r4, PRIMASK + cpsid i + bl ftan_shim_call + msr PRIMASK, r4 + pop {r4, pc} +#endif +ftan_shim_call: + table_tail_call SF_TABLE_FTAN +#if !PICO_DIVIDER_DISABLE_INTERRUPTS +ftan_save_state: + save_div_state_and_lr + bl ftan_shim_call + ldr r2, =(SIO_BASE) + restore_div_state_and_return +#endif +ftan_out_of_range: +#if PICO_FLOAT_PROPAGATE_NANS + // also check for infinites + cmp r1, #255 + bne 3f + // infinite to nan + movs r1, #1 + lsls r1, #22 + orrs r0, r1 + bx lr +3: +#endif + ldr r1, =0x40c90fdb // 2 * M_PI + push {lr} + bl remainderf + pop {r1} + mov lr, r1 + b ftan_in_range + +float_wrapper_section atan2f +wrapper_func_f2 atan2f + shimmable_table_tail_call SF_TABLE_FATAN2 fatan2_shim + +float_wrapper_section expf +wrapper_func_f1 expf + table_tail_call SF_TABLE_FEXP + +float_wrapper_section logf +wrapper_func_f1 logf + table_tail_call SF_TABLE_FLN diff --git a/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_init_rom.c b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_init_rom.c new file mode 100644 index 00000000000..062bfe707e6 --- /dev/null +++ b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_init_rom.c @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include "pico/bootrom.h" +#include "pico/bootrom/sf_table.h" + +// NOTE THIS FUNCTION TABLE IS NOT PUBLIC OR NECESSARILY COMPLETE... +// IT IS ***NOT*** SAFE TO CALL THESE FUNCTION POINTERS FROM ARBITRARY CODE +uint32_t sf_table[SF_TABLE_V2_SIZE / 2]; +void __attribute__((weak)) *sf_clz_func; + +#if !(PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED) +static __attribute__((noreturn)) void missing_float_func_shim(void) { + panic(""); +} +#endif + +void __aeabi_float_init(void) { + int rom_version = rp2040_rom_version(); + void *rom_table = rom_data_lookup(rom_table_code('S', 'F')); +#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED + if (rom_version == 1) { + memcpy(&sf_table, rom_table, SF_TABLE_V1_SIZE); + extern void float_table_shim_on_use_helper(void); + // todo replace NDEBUG with a more exclusive assertion guard +#ifndef NDEBUG + if (*(uint16_t *)0x29ee != 0x0fc4 || // this is packx + *(uint16_t *)0x29c0 != 0x0dc2 || // this is upackx + *(uint16_t *)0x2b96 != 0xb5c0 || // this is cordic_vec + *(uint16_t *)0x2b18 != 0x2500 || // this is packretns + *(uint16_t *)0x2acc != 0xb510 || // this is float2fix + *(uint32_t *)0x2cfc != 0x6487ed51 // pi_q29 + ) { + panic(""); + } +#endif + + // this is a little tricky.. we only want to pull in a shim if the corresponding function + // is called. to that end we include a SVC instruction with the table offset as the call number + // followed by the shim function pointer inside the actual wrapper function. that way if the wrapper + // function is garbage collected, so is the shim function. + // + // float_table_shim_on_use_helper expects this SVC instruction in the calling code soon after the address + // pointed to by IP and patches the float_table entry with the real shim the first time the function is called. + + for(uint i=SF_TABLE_V1_SIZE/4; i= 2) { + assert(*((uint8_t *)rom_table-2) * 4 >= SF_TABLE_V2_SIZE); + memcpy(&sf_table, rom_table, SF_TABLE_V2_SIZE); + } + sf_clz_func = rom_func_lookup(ROM_FUNC_CLZ32); +} diff --git a/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_math.c b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_math.c new file mode 100644 index 00000000000..93eb01dff10 --- /dev/null +++ b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_math.c @@ -0,0 +1,582 @@ +/* + * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "pico/float.h" + +// opened a separate issue https://github.com/raspberrypi/pico-sdk/issues/166 to deal with these warnings if at all +GCC_Pragma("GCC diagnostic push") +GCC_Pragma("GCC diagnostic ignored \"-Wconversion\"") +GCC_Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") + +typedef uint32_t ui32; +typedef int32_t i32; + +#define FPINF ( HUGE_VALF) +#define FMINF (-HUGE_VALF) +#define NANF ((float)NAN) +#define PZERO (+0.0) +#define MZERO (-0.0) + +#define PI 3.14159265358979323846 +#define LOG2 0.69314718055994530941 +// Unfortunately in double precision ln(10) is very close to half-way between to representable numbers +#define LOG10 2.30258509299404568401 +#define LOG2E 1.44269504088896340737 +#define LOG10E 0.43429448190325182765 +#define ONETHIRD 0.33333333333333333333 + +#define PIf 3.14159265358979323846f +#define LOG2f 0.69314718055994530941f +#define LOG2Ef 1.44269504088896340737f +#define LOG10Ef 0.43429448190325182765f +#define ONETHIRDf 0.33333333333333333333f + +#define FUNPACK(x,e,m) e=((x)>>23)&0xff,m=((x)&0x007fffff)|0x00800000 +#define FUNPACKS(x,s,e,m) s=((x)>>31),FUNPACK((x),(e),(m)) + +typedef union { + float f; + ui32 ix; +} float_ui32; + +static inline float ui322float(ui32 ix) { + float_ui32 tmp; + tmp.ix = ix; + return tmp.f; +} + +static inline ui32 float2ui32(float f) { + float_ui32 tmp; + tmp.f = f; + return tmp.ix; +} + +#if PICO_FLOAT_PROPAGATE_NANS +static inline bool fisnan(float x) { + ui32 ix=float2ui32(x); + return ix * 2 > 0xff000000u; +} + +#define check_nan_f1(x) if (fisnan((x))) return (x) +#define check_nan_f2(x,y) if (fisnan((x))) return (x); else if (fisnan((y))) return (y); +#else +#define check_nan_f1(x) ((void)0) +#define check_nan_f2(x,y) ((void)0) +#endif + +static inline int fgetsignexp(float x) { + ui32 ix=float2ui32(x); + return (ix>>23)&0x1ff; +} + +static inline int fgetexp(float x) { + ui32 ix=float2ui32(x); + return (ix>>23)&0xff; +} + +static inline float fldexp(float x,int de) { + ui32 ix=float2ui32(x),iy; + int e; + e=fgetexp(x); + if(e==0||e==0xff) return x; + e+=de; + if(e<=0) iy=ix&0x80000000; // signed zero for underflow + else if(e>=0xff) iy=(ix&0x80000000)|0x7f800000ULL; // signed infinity on overflow + else iy=ix+((ui32)de<<23); + return ui322float(iy); +} + +float WRAPPER_FUNC(ldexpf)(float x, int de) { + check_nan_f1(x); + return fldexp(x, de); +} + +static inline float fcopysign(float x,float y) { + ui32 ix=float2ui32(x),iy=float2ui32(y); + ix=((ix&0x7fffffff)|(iy&0x80000000)); + return ui322float(ix); +} + +float WRAPPER_FUNC(copysignf)(float x, float y) { + check_nan_f2(x,y); + return fcopysign(x, y); +} + +static inline int fiszero(float x) { return fgetexp (x)==0; } +//static inline int fispzero(float x) { return fgetsignexp(x)==0; } +//static inline int fismzero(float x) { return fgetsignexp(x)==0x100; } +static inline int fisinf(float x) { return fgetexp (x)==0xff; } +static inline int fispinf(float x) { return fgetsignexp(x)==0xff; } +static inline int fisminf(float x) { return fgetsignexp(x)==0x1ff; } + +static inline int fisint(float x) { + ui32 ix=float2ui32(x),m; + int e=fgetexp(x); + if(e==0) return 1; // 0 is an integer + e-=0x7f; // remove exponent bias + if(e<0) return 0; // |x|<1 + e=23-e; // bit position in mantissa with significance 1 + if(e<=0) return 1; // |x| large, so must be an integer + m=(1<>e)&1; +} + +static inline int fisstrictneg(float x) { + ui32 ix=float2ui32(x); + if(fiszero(x)) return 0; + return ix>>31; +} + +static inline int fisneg(float x) { + ui32 ix=float2ui32(x); + return ix>>31; +} + +static inline float fneg(float x) { + ui32 ix=float2ui32(x); + ix^=0x80000000; + return ui322float(ix); +} + +static inline int fispo2(float x) { + ui32 ix=float2ui32(x); + if(fiszero(x)) return 0; + if(fisinf(x)) return 0; + ix&=0x007fffff; + return ix==0; +} + +static inline float fnan_or(float x) { +#if PICO_FLOAT_PROPAGATE_NANS + return NANF; +#else + return x; +#endif +} + +float WRAPPER_FUNC(truncf)(float x) { + check_nan_f1(x); + ui32 ix=float2ui32(x),m; + int e=fgetexp(x); + e-=0x7f; // remove exponent bias + if(e<0) { // |x|<1 + ix&=0x80000000; + return ui322float(ix); + } + e=23-e; // bit position in mantissa with significance 1 + if(e<=0) return x; // |x| large, so must be an integer + m=(1<=4+0x7f) { // |x|>=16? + if(!fisneg(x)) return 1; // 1 << exp 2x; avoid generating infinities later + else return -1; // 1 >> exp 2x + } + u=expf(fldexp(x,1)); + return (u-1.0f)/(u+1.0f); +} + +float WRAPPER_FUNC(asinhf)(float x) { + check_nan_f1(x); + int e; + e=fgetexp(x); + if(e>=16+0x7f) { // |x|>=2^16? + if(!fisneg(x)) return logf( x )+LOG2f; // 1/x^2 << 1 + else return fneg(logf(fneg(x))+LOG2f); // 1/x^2 << 1 + } + if(x>0) return (float)log(sqrt((double)x*(double)x+1.0)+(double)x); + else return fneg((float)log(sqrt((double)x*(double)x+1.0)-(double)x)); +} + +float WRAPPER_FUNC(acoshf)(float x) { + check_nan_f1(x); + int e; + if(fisneg(x)) x=fneg(x); + e=fgetexp(x); + if(e>=16+0x7f) return logf(x)+LOG2f; // |x|>=2^16? + return (float)log(sqrt(((double)x+1.0)*((double)x-1.0))+(double)x); +} + +float WRAPPER_FUNC(atanhf)(float x) { + check_nan_f1(x); + return fldexp(logf((1.0f+x)/(1.0f-x)),-1); +} + +float WRAPPER_FUNC(exp2f)(float x) { check_nan_f1(x); return (float)exp((double)x*LOG2); } +float WRAPPER_FUNC(log2f)(float x) { check_nan_f1(x); return logf(x)*LOG2Ef; } +float WRAPPER_FUNC(exp10f)(float x) { check_nan_f1(x); return (float)exp((double)x*LOG10); } +float WRAPPER_FUNC(log10f)(float x) { check_nan_f1(x); return logf(x)*LOG10Ef; } + +float WRAPPER_FUNC(expm1f)(float x) { check_nan_f1(x); return (float)(exp((double)x)-1); } +float WRAPPER_FUNC(log1pf)(float x) { check_nan_f1(x); return (float)(log(1+(double)x)); } +float WRAPPER_FUNC(fmaf)(float x,float y,float z) { + check_nan_f2(x,y); + check_nan_f1(z); + return (float)((double)x*(double)y+(double)z); +} // has double rounding so not exact + +// general power, x>0 +static inline float fpow_1(float x,float y) { + return (float)exp(log((double)x)*(double)y); // using double-precision intermediates for better accuracy +} + +static float fpow_int2(float x,int y) { + float u; + if(y==1) return x; + u=fpow_int2(x,y/2); + u*=u; + if(y&1) u*=x; + return u; +} + +// for the case where x not zero or infinity, y small and not zero +static inline float fpowint_1(float x,int y) { + if(y<0) x=1.0f/x,y=-y; + return fpow_int2(x,y); +} + +// for the case where x not zero or infinity +static float fpowint_0(float x,int y) { + int e; + if(fisneg(x)) { + if(fisoddint(y)) return fneg(fpowint_0(fneg(x),y)); + else return fpowint_0(fneg(x),y); + } + if(fispo2(x)) { + e=fgetexp(x)-0x7f; + if(y>=256) y= 255; // avoid overflow + if(y<-256) y=-256; + y*=e; + return fldexp(1,y); + } + if(y==0) return 1; + if(y>=-32&&y<=32) return fpowint_1(x,y); + return fpow_1(x,y); +} + +float WRAPPER_FUNC(powintf)(float x,int y) { + GCC_Pragma("GCC diagnostic push") + GCC_Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") + if(x==1.0f||y==0) return 1; + if(x==0.0f) { + if(y>0) { + if(y&1) return x; + else return 0; + } + if((y&1)) return fcopysign(FPINF,x); + return FPINF; + } + GCC_Pragma("GCC diagnostic pop") + check_nan_f1(x); + if(fispinf(x)) { + if(y<0) return 0; + else return FPINF; + } + if(fisminf(x)) { + if(y>0) { + if((y&1)) return FMINF; + else return FPINF; + } + if((y&1)) return MZERO; + else return PZERO; + } + return fpowint_0(x,y); +} + +// for the case where y is guaranteed a finite integer, x not zero or infinity +static float fpow_0(float x,float y) { + int e,p; + if(fisneg(x)) { + if(fisoddint(y)) return fneg(fpow_0(fneg(x),y)); + else return fpow_0(fneg(x),y); + } + p=(int)y; + if(fispo2(x)) { + e=fgetexp(x)-0x7f; + if(p>=256) p= 255; // avoid overflow + if(p<-256) p=-256; + p*=e; + return fldexp(1,p); + } + if(p==0) return 1; + if(p>=-32&&p<=32) return fpowint_1(x,p); + return fpow_1(x,y); +} + +float WRAPPER_FUNC(powf)(float x,float y) { + GCC_Like_Pragma("GCC diagnostic push") + GCC_Like_Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") + if(x==1.0f||fiszero(y)) return 1; + check_nan_f2(x,y); + if(x==-1.0f&&fisinf(y)) return 1; + GCC_Like_Pragma("GCC diagnostic pop") + if(fiszero(x)) { + if(!fisneg(y)) { + if(fisoddint(y)) return x; + else return 0; + } + if(fisoddint(y)) return fcopysign(FPINF,x); + return FPINF; + } + if(fispinf(x)) { + if(fisneg(y)) return 0; + else return FPINF; + } + if(fisminf(x)) { + if(!fisneg(y)) { + if(fisoddint(y)) return FMINF; + else return FPINF; + } + if(fisoddint(y)) return MZERO; + else return PZERO; + } + if(fispinf(y)) { + if(fgetexp(x)<0x7f) return PZERO; + else return FPINF; + } + if(fisminf(y)) { + if(fgetexp(x)<0x7f) return FPINF; + else return PZERO; + } + if(fisint(y)) return fpow_0(x,y); + if(fisneg(x)) return FPINF; + return fpow_1(x,y); +} + +float WRAPPER_FUNC(hypotf)(float x,float y) { + check_nan_f2(x,y); + int ex,ey; + ex=fgetexp(x); ey=fgetexp(y); + if(ex>=0x7f+50||ey>=0x7f+50) { // overflow, or nearly so + x=fldexp(x,-70),y=fldexp(y,-70); + return fldexp(sqrtf(x*x+y*y), 70); + } + else if(ex<=0x7f-50&&ey<=0x7f-50) { // underflow, or nearly so + x=fldexp(x, 70),y=fldexp(y, 70); + return fldexp(sqrtf(x*x+y*y),-70); + } + return sqrtf(x*x+y*y); +} + +float WRAPPER_FUNC(cbrtf)(float x) { + check_nan_f1(x); + int e; + if(fisneg(x)) return fneg(cbrtf(fneg(x))); + if(fiszero(x)) return fcopysign(PZERO,x); + e=fgetexp(x)-0x7f; + e=(e*0x5555+0x8000)>>16; // ~e/3, rounded + x=fldexp(x,-e*3); + x=expf(logf(x)*ONETHIRDf); + return fldexp(x,e); +} + +// reduces mx*2^e modulo my, returning bottom bits of quotient at *pquo +// 2^23<=|mx|,my<2^24, e>=0; 0<=result0) { + r=0xffffffffU/(ui32)(my>>7); // reciprocal estimate Q16 + } + while(e>0) { + s=e; if(s>12) s=12; // gain up to 12 bits on each iteration + q=(mx>>9)*r; // Q30 + q=((q>>(29-s))+1)>>1; // Q(s), rounded + mx=(mx<=my) mx-=my,quo++; // when e==0 mx can be nearly as big as 2my + if(mx>=my) mx-=my,quo++; + if(mx<0) mx+=my,quo--; + if(mx<0) mx+=my,quo--; + if(pquo) *pquo=quo; + return mx; +} + +float WRAPPER_FUNC(fmodf)(float x,float y) { + check_nan_f2(x,y); + ui32 ix=float2ui32(x),iy=float2ui32(y); + int sx,ex,ey; + i32 mx,my; + FUNPACKS(ix,sx,ex,mx); + FUNPACK(iy,ey,my); + if(ex==0xff) { + return fnan_or(FPINF); + } + if(ey==0) return FPINF; + if(ex==0) { + if(!fisneg(x)) return PZERO; + return MZERO; + } + if(ex|y|/2 + mx-=my+my; + ey--; + q=1; + } else { // x<-|y|/2 + mx=my+my-mx; + ey--; + q=-1; + } + } + else { + if(sx) mx=-mx; + mx=frem_0(mx,my,ex-ey,&q); + if(mx+mx>my || (mx+mx==my&&(q&1)) ) { // |x|>|y|/2, or equality and an odd quotient? + mx-=my; + q++; + } + } + if(sy) q=-q; + if(quo) *quo=q; + return fix2float(mx,0x7f-ey+23); +} + +float WRAPPER_FUNC(dremf)(float x,float y) { check_nan_f2(x,y); return remquof(x,y,0); } + +float WRAPPER_FUNC(remainderf)(float x,float y) { check_nan_f2(x,y); return remquof(x,y,0); } + +GCC_Pragma("GCC diagnostic pop") // conversion \ No newline at end of file diff --git a/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_none.S b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_none.S new file mode 100644 index 00000000000..5b1916a7e79 --- /dev/null +++ b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_none.S @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "pico/asm_helper.S" +#include "pico/bootrom/sf_table.h" + +pico_default_asm_setup + +wrapper_func __aeabi_fadd +wrapper_func __aeabi_fdiv +wrapper_func __aeabi_fmul +wrapper_func __aeabi_frsub +wrapper_func __aeabi_fsub +wrapper_func __aeabi_cfcmpeq +wrapper_func __aeabi_cfrcmple +wrapper_func __aeabi_cfcmple +wrapper_func __aeabi_fcmpeq +wrapper_func __aeabi_fcmplt +wrapper_func __aeabi_fcmple +wrapper_func __aeabi_fcmpge +wrapper_func __aeabi_fcmpgt +wrapper_func __aeabi_fcmpun +wrapper_func __aeabi_i2f +wrapper_func __aeabi_l2f +wrapper_func __aeabi_ui2f +wrapper_func __aeabi_ul2f +wrapper_func __aeabi_f2iz +wrapper_func __aeabi_f2lz +wrapper_func __aeabi_f2uiz +wrapper_func __aeabi_f2ulz +wrapper_func __aeabi_f2d +wrapper_func sqrtf +wrapper_func cosf +wrapper_func sinf +wrapper_func tanf +wrapper_func atan2f +wrapper_func expf +wrapper_func logf +wrapper_func ldexpf +wrapper_func copysignf +wrapper_func truncf +wrapper_func floorf +wrapper_func ceilf +wrapper_func roundf +wrapper_func sincosf +wrapper_func asinf +wrapper_func acosf +wrapper_func atanf +wrapper_func sinhf +wrapper_func coshf +wrapper_func tanhf +wrapper_func asinhf +wrapper_func acoshf +wrapper_func atanhf +wrapper_func exp2f +wrapper_func log2f +wrapper_func exp10f +wrapper_func log10f +wrapper_func powf +wrapper_func powintf +wrapper_func hypotf +wrapper_func cbrtf +wrapper_func fmodf +wrapper_func dremf +wrapper_func remainderf +wrapper_func remquof +wrapper_func expm1f +wrapper_func log1pf +wrapper_func fmaf + push {lr} // keep stack trace sane + ldr r0, =str + bl panic + +str: + .asciz "float support is disabled" \ No newline at end of file diff --git a/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_v1_rom_shim.S b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_v1_rom_shim.S new file mode 100644 index 00000000000..665a61038b7 --- /dev/null +++ b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_v1_rom_shim.S @@ -0,0 +1,346 @@ +/* + * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "pico/asm_helper.S" + +#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED + +#ifndef PICO_FLOAT_IN_RAM +#define PICO_FLOAT_IN_RAM 0 +#endif + +pico_default_asm_setup + +.macro float_section name +// todo separate flag for shims? +#if PICO_FLOAT_IN_RAM +.section RAM_SECTION_NAME(\name), "ax" +#else +.section SECTION_NAME(\name), "ax" +#endif +.endm + +float_section float_table_shim_on_use_helper +regular_func float_table_shim_on_use_helper + push {r0-r2, lr} + mov r0, ip +#ifndef NDEBUG + // sanity check to make sure we weren't called by non (shimmable_) table_tail_call macro + cmp r0, #0 + bne 1f + bkpt #0 +#endif +1: + ldrh r1, [r0] + lsrs r2, r1, #8 + adds r0, #2 + cmp r2, #0xdf + bne 1b + uxtb r1, r1 // r1 holds table offset + lsrs r2, r0, #2 + bcc 1f + // unaligned + ldrh r2, [r0, #0] + ldrh r0, [r0, #2] + lsls r0, #16 + orrs r0, r2 + b 2f +1: + ldr r0, [r0] +2: + ldr r2, =sf_table + str r0, [r2, r1] + str r0, [sp, #12] + pop {r0-r2, pc} + +float_section 642float_shims + +@ convert uint64 to float, rounding +regular_func uint642float_shim + movs r2,#0 @ fall through + +@ convert unsigned 64-bit fix to float, rounding; number of r0:r1 bits after point in r2 +regular_func ufix642float_shim + push {r4,r5,r14} + cmp r1,#0 + bpl 3f @ positive? we can use signed code + lsls r5,r1,#31 @ contribution to sticky bits + orrs r5,r0 + lsrs r0,r1,#1 + subs r2,#1 + b 4f + +@ convert int64 to float, rounding +regular_func int642float_shim + movs r2,#0 @ fall through + +@ convert signed 64-bit fix to float, rounding; number of r0:r1 bits after point in r2 +regular_func fix642float_shim + push {r4,r5,r14} +3: + movs r5,r0 + orrs r5,r1 + beq ret_pop45 @ zero? return +0 + asrs r5,r1,#31 @ sign bits +2: + asrs r4,r1,#24 @ try shifting 7 bits at a time + cmp r4,r5 + bne 1f @ next shift will overflow? + lsls r1,#7 + lsrs r4,r0,#25 + orrs r1,r4 + lsls r0,#7 + adds r2,#7 + b 2b +1: + movs r5,r0 + movs r0,r1 +4: + negs r2,r2 + adds r2,#32+29 + + // bl packx + ldr r1, =0x29ef // packx + blx r1 +ret_pop45: + pop {r4,r5,r15} + +float_section fatan2_shim +regular_func fatan2_shim + push {r4,r5,r14} + + ldr r4, =0x29c1 // unpackx + mov ip, r4 +@ unpack arguments and shift one down to have common exponent + blx ip + mov r4,r0 + mov r0,r1 + mov r1,r4 + mov r4,r2 + mov r2,r3 + mov r3,r4 + blx ip + lsls r0,r0,#5 @ Q28 + lsls r1,r1,#5 @ Q28 + adds r4,r2,r3 @ this is -760 if both arguments are 0 and at least -380-126=-506 otherwise + asrs r4,#9 + adds r4,#1 + bmi 2f @ force y to 0 proper, so result will be zero + subs r4,r2,r3 @ calculate shift + bge 1f @ ex>=ey? + negs r4,r4 @ make shift positive + asrs r0,r4 + cmp r4,#28 + blo 3f + asrs r0,#31 + b 3f +1: + asrs r1,r4 + cmp r4,#28 + blo 3f +2: +@ here |x|>>|y| or both x and y are ±0 + cmp r0,#0 + bge 4f @ x positive, return signed 0 + ldr r3, =0x2cfc @ &pi_q29, circular coefficients + ldr r0,[r3] @ x negative, return +/- pi + asrs r1,#31 + eors r0,r1 + b 7f +4: + asrs r0,r1,#31 + b 7f +3: + movs r2,#0 @ initial angle + ldr r3, =0x2cfc @ &pi_q29, circular coefficients + cmp r0,#0 @ x negative + bge 5f + negs r0,r0 @ rotate to 1st/4th quadrants + negs r1,r1 + ldr r2,[r3] @ pi Q29 +5: + movs r4,#1 @ m=1 + ldr r5, =0x2b97 @ cordic_vec + blx r5 @ also produces magnitude (with scaling factor 1.646760119), which is discarded + mov r0,r2 @ result here is -pi/2..3pi/2 Q29 +@ asrs r2,#29 +@ subs r0,r2 + ldr r3, =0x2cfc @ &pi_q29, circular coefficients + ldr r2,[r3] @ pi Q29 + adds r4,r0,r2 @ attempt to fix -3pi/2..-pi case + bcs 6f @ -pi/2..0? leave result as is + subs r4,r0,r2 @ pi: take off 2pi +6: + subs r0,#1 @ fiddle factor so atan2(0,1)==0 +7: + movs r2,#0 @ exponent for pack + ldr r3, =0x2b19 + bx r3 + +float_section float232_shims + +regular_func float2int_shim + movs r1,#0 @ fall through +regular_func float2fix_shim + // check for -0 or -denormal upfront + asrs r2, r0, #23 + adds r2, #128 + adds r2, #128 + beq 1f + // call original + ldr r2, =0x2acd + bx r2 + 1: + movs r0, #0 + bx lr + +float_section float264_shims + +regular_func float2int64_shim + movs r1,#0 @ and fall through +regular_func float2fix64_shim + push {r14} + bl f2fix + b d2f64_a + +regular_func float2uint64_shim + movs r1,#0 @ and fall through +regular_func float2ufix64_shim + asrs r3,r0,#23 @ negative? return 0 + bmi ret_dzero +@ and fall through + +@ convert float in r0 to signed fixed point in r0:r1:r3, r1 places after point, rounding towards -Inf +@ result clamped so that r3 can only be 0 or -1 +@ trashes r12 +.thumb_func +f2fix: + push {r4,r14} + mov r12,r1 + asrs r3,r0,#31 + lsls r0,#1 + lsrs r2,r0,#24 + beq 1f @ zero? + cmp r2,#0xff @ Inf? + beq 2f + subs r1,r2,#1 + subs r2,#0x7f @ remove exponent bias + lsls r1,#24 + subs r0,r1 @ insert implied 1 + eors r0,r3 + subs r0,r3 @ top two's complement + asrs r1,r0,#4 @ convert to double format + lsls r0,#28 + ldr r4, =d2fix_a + bx r4 +1: + movs r0,#0 + movs r1,r0 + movs r3,r0 + pop {r4,r15} +2: + mvns r0,r3 @ return max/min value + mvns r1,r3 + pop {r4,r15} + +ret_dzero: + movs r0,#0 + movs r1,#0 + bx r14 + +float_section d2fix_a_float + +.weak d2fix_a // weak because it exists in float shims too +.thumb_func +d2fix_a: +@ here +@ r0:r1 two's complement mantissa +@ r2 unbaised exponent +@ r3 mantissa sign extension bits + add r2,r12 @ exponent plus offset for required binary point position + subs r2,#52 @ required shift + bmi 1f @ shift down? +@ here a shift up by r2 places + cmp r2,#12 @ will clamp? + bge 2f + movs r4,r0 + lsls r1,r2 + lsls r0,r2 + negs r2,r2 + adds r2,#32 @ complementary shift + lsrs r4,r2 + orrs r1,r4 + pop {r4,r15} +2: + mvns r0,r3 + mvns r1,r3 @ overflow: clamp to extreme fixed-point values + pop {r4,r15} +1: +@ here a shift down by -r2 places + adds r2,#32 + bmi 1f @ long shift? + mov r4,r1 + lsls r4,r2 + negs r2,r2 + adds r2,#32 @ complementary shift + asrs r1,r2 + lsrs r0,r2 + orrs r0,r4 + pop {r4,r15} +1: +@ here a long shift down + movs r0,r1 + asrs r1,#31 @ shift down 32 places + adds r2,#32 + bmi 1f @ very long shift? + negs r2,r2 + adds r2,#32 + asrs r0,r2 + pop {r4,r15} +1: + movs r0,r3 @ result very near zero: use sign extension bits + movs r1,r3 + pop {r4,r15} +d2f64_a: + asrs r2,r1,#31 + cmp r2,r3 + bne 1f @ sign extension bits fail to match sign of result? + pop {r15} +1: + mvns r0,r3 + movs r1,#1 + lsls r1,#31 + eors r1,r1,r0 @ generate extreme fixed-point values + pop {r15} + +float_section float2double_shim +regular_func float2double_shim + lsrs r3,r0,#31 @ sign bit + lsls r3,#31 + lsls r1,r0,#1 + lsrs r2,r1,#24 @ exponent + beq 1f @ zero? + cmp r2,#0xff @ Inf? + beq 2f + lsrs r1,#4 @ exponent and top 20 bits of mantissa + ldr r2,=(0x3ff-0x7f)<<20 @ difference in exponent offsets + adds r1,r2 + orrs r1,r3 + lsls r0,#29 @ bottom 3 bits of mantissa + bx r14 +1: + movs r1,r3 @ return signed zero +3: + movs r0,#0 + bx r14 +2: + ldr r1,=0x7ff00000 @ return signed infinity + adds r1,r3 + b 3b + +#endif \ No newline at end of file diff --git a/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/include/pico/float.h b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/include/pico/float.h new file mode 100644 index 00000000000..0621ffcb7cd --- /dev/null +++ b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/include/pico/float.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef _PICO_FLOAT_H +#define _PICO_FLOAT_H + +#include +#include +#include "pico.h" +#include "pico/bootrom/sf_table.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** \file float.h +* \defgroup pico_float pico_float +* +* Optimized single-precision floating point functions +* +* (Replacement) optimized implementations are provided of the following compiler built-ins +* and math library functions: +* +* - __aeabi_fadd, __aeabi_fdiv, __aeabi_fmul, __aeabi_frsub, __aeabi_fsub, __aeabi_cfcmpeq, __aeabi_cfrcmple, __aeabi_cfcmple, __aeabi_fcmpeq, __aeabi_fcmplt, __aeabi_fcmple, __aeabi_fcmpge, __aeabi_fcmpgt, __aeabi_fcmpun, __aeabi_i2f, __aeabi_l2f, __aeabi_ui2f, __aeabi_ul2f, __aeabi_f2iz, __aeabi_f2lz, __aeabi_f2uiz, __aeabi_f2ulz, __aeabi_f2d, sqrtf, cosf, sinf, tanf, atan2f, expf, logf +* - ldexpf, copysignf, truncf, floorf, ceilf, roundf, asinf, acosf, atanf, sinhf, coshf, tanhf, asinhf, acoshf, atanhf, exp2f, log2f, exp10f, log10f, powf, hypotf, cbrtf, fmodf, dremf, remainderf, remquof, expm1f, log1pf, fmaf +* - powintf, sincosf (GNU extensions) +* +* The following additional optimized functions are also provided: +* +* - fix2float, ufix2float, fix642float, ufix642float, float2fix, float2ufix, float2fix64, float2ufix64, float2int, float2int64, float2int_z, float2int64_z +*/ + +float fix2float(int32_t m, int e); +float ufix2float(uint32_t m, int e); +float fix642float(int64_t m, int e); +float ufix642float(uint64_t m, int e); + +// These methods round towards -Infinity. +int32_t float2fix(float f, int e); +uint32_t float2ufix(float f, int e); +int64_t float2fix64(float f, int e); +uint64_t float2ufix64(float f, int e); +int32_t float2int(float f); +int64_t float2int64(float f); + +// These methods round towards 0. +int32_t float2int_z(float f); +int64_t float2int64_z(float f); + +float exp10f(float x); +void sincosf(float x, float *sinx, float *cosx); +float powintf(float x, int y); + +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file diff --git a/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_standard_link/binary_info.c b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_standard_link/binary_info.c deleted file mode 100644 index aa67ac4c506..00000000000 --- a/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_standard_link/binary_info.c +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. - * - * SPDX-License-Identifier: BSD-3-Clause - */ - -#if !PICO_NO_BINARY_INFO && !PICO_NO_PROGRAM_INFO -#include "pico/binary_info.h" - -#if !PICO_NO_FLASH -#include "boot_stage2/config.h" -#endif - -// Note we put at most 4 pieces of binary info in the reset section because that's how much spare space we had -// (picked the most common ones)... if there is a link failure because of .reset section overflow then move -// more out. -#define reset_section_attr __attribute__((section(".reset"))) - -#if !PICO_NO_FLASH -#ifndef PICO_NO_BI_BINARY_SIZE -extern char __flash_binary_end; -bi_decl_with_attr(bi_binary_end((intptr_t)&__flash_binary_end), reset_section_attr) -#endif -#endif - -#if !PICO_NO_BI_PROGRAM_BUILD_DATE -#ifndef PICO_PROGRAM_BUILD_DATE -#define PICO_PROGRAM_BUILD_DATE __DATE__ -#endif -bi_decl_with_attr(bi_program_build_date_string(PICO_PROGRAM_BUILD_DATE), reset_section_attr); -#endif - -#if !PICO_NO_BI_PROGRAM_NAME -#if !defined(PICO_PROGRAM_NAME) && defined(PICO_TARGET_NAME) -#define PICO_PROGRAM_NAME PICO_TARGET_NAME -#endif -#ifdef PICO_PROGRAM_NAME -bi_decl_with_attr(bi_program_name(PICO_PROGRAM_NAME), reset_section_attr) -#endif -#endif - -#if !PICO_NO_BI_PICO_BOARD -#ifdef PICO_BOARD -bi_decl(bi_string(BINARY_INFO_TAG_RASPBERRY_PI, BINARY_INFO_ID_RP_PICO_BOARD, PICO_BOARD)) -#endif -#endif - -#if !PICO_NO_BI_SDK_VERSION -#ifdef PICO_SDK_VERSION_STRING -bi_decl_with_attr(bi_string(BINARY_INFO_TAG_RASPBERRY_PI, BINARY_INFO_ID_RP_SDK_VERSION, PICO_SDK_VERSION_STRING),reset_section_attr) -#endif -#endif - -#if !PICO_NO_BI_PROGRAM_VERSION_STRING -#ifdef PICO_PROGRAM_VERSION_STRING -bi_decl(bi_program_version_string(PICO_PROGRAM_VERSION_STRING)) -#endif -#endif - - -#if !PICO_NO_BI_PROGRAM_DESCRIPTION -#ifdef PICO_PROGRAM_DESCRIPTION -bi_decl(bi_program_description(PICO_PROGRAM_DESCRIPTION)) -#endif -#endif - -#if !PICO_NO_BI_PROGRAM_URL -#ifdef PICO_PROGRAM_URL -bi_decl(bi_program_url(PICO_PROGRAM_URL)) -#endif -#endif - -#if !PICO_NO_BI_BOOT_STAGE2_NAME -#ifdef PICO_BOOT_STAGE2_NAME -bi_decl(bi_string(BINARY_INFO_TAG_RASPBERRY_PI, BINARY_INFO_ID_RP_BOOT2_NAME, PICO_BOOT_STAGE2_NAME)) -#endif -#endif - -#if !PICO_NO_BI_BUILD_TYPE -#ifdef PICO_CMAKE_BUILD_TYPE -bi_decl(bi_program_build_attribute(PICO_CMAKE_BUILD_TYPE)) -#else -#ifndef NDEBUG -bi_decl(bi_program_build_attribute("Debug")) -#else -bi_decl(bi_program_build_attribute("Release")) -#endif -#endif - -#if PICO_DEOPTIMIZED_DEBUG -bi_decl(bi_program_build_attribute("All optimization disabled")) -#endif -#endif - -#endif diff --git a/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_standard_link/crt0.S b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_standard_link/crt0.S deleted file mode 100644 index 4b600307e22..00000000000 --- a/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_standard_link/crt0.S +++ /dev/null @@ -1,352 +0,0 @@ -/* - * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. - * - * SPDX-License-Identifier: BSD-3-Clause - */ - -#include "pico.h" -#include "pico/asm_helper.S" - -#include "hardware/regs/m0plus.h" -#include "hardware/regs/addressmap.h" -#include "hardware/regs/sio.h" -#include "pico/binary_info/defs.h" - -#ifdef NDEBUG -#ifndef COLLAPSE_IRQS -#define COLLAPSE_IRQS -#endif -#endif - -pico_default_asm_setup - -.section .vectors, "ax" -.align 2 - -.global __vectors, __VECTOR_TABLE -__VECTOR_TABLE: -__vectors: -.word __StackTop -.word _reset_handler -.word NMI_Handler -.word HardFault_Handler -.word isr_invalid // Reserved, should never fire -.word isr_invalid // Reserved, should never fire -.word isr_invalid // Reserved, should never fire -.word isr_invalid // Reserved, should never fire -.word isr_invalid // Reserved, should never fire -.word isr_invalid // Reserved, should never fire -.word isr_invalid // Reserved, should never fire -.word SVC_Handler -.word isr_invalid // Reserved, should never fire -.word isr_invalid // Reserved, should never fire -.word PendSV_Handler -.word SysTick_Handler -.word TIMER_IRQ_0_Handler -.word TIMER_IRQ_1_Handler -.word TIMER_IRQ_2_Handler -.word TIMER_IRQ_3_Handler -.word PWM_IRQ_WRAP_Handler -.word USBCTRL_IRQ_Handler -.word XIP_IRQ_Handler -.word PIO0_IRQ_0_Handler -.word PIO0_IRQ_1_Handler -.word PIO1_IRQ_0_Handler -.word TIMER_IRQ_1_Handler0 -.word TIMER_IRQ_1_Handler1 -.word TIMER_IRQ_1_Handler2 -.word TIMER_IRQ_1_Handler3 -.word TIMER_IRQ_1_Handler4 -.word TIMER_IRQ_1_Handler5 -.word TIMER_IRQ_1_Handler6 -.word TIMER_IRQ_1_Handler7 -.word TIMER_IRQ_1_Handler8 -.word TIMER_IRQ_1_Handler9 -.word TIMER_IRQ_2_Handler0 -.word TIMER_IRQ_2_Handler1 -.word TIMER_IRQ_2_Handler2 -.word TIMER_IRQ_2_Handler3 -.word TIMER_IRQ_2_Handler4 -.word TIMER_IRQ_2_Handler5 -.word TIMER_IRQ_2_Handler6 -.word TIMER_IRQ_2_Handler7 -.word TIMER_IRQ_2_Handler8 -.word TIMER_IRQ_2_Handler9 -.word TIMER_IRQ_3_Handler0 -.word TIMER_IRQ_3_Handler1 - -// all default exception handlers do nothing, and we can check for them being set to our -// default values by seeing if they point to somewhere between __defaults_isrs_start and __default_isrs_end -.global __default_isrs_start -__default_isrs_start: - -// Declare a weak symbol for each ISR. -// By default, they will fall through to the undefined IRQ handler below (breakpoint), -// but can be overridden by C functions with correct name. - -.macro decl_isr_bkpt name -.weak \name -.type \name,%function -.thumb_func -\name: - bkpt #0 -.endm - -// these are separated out for clarity -decl_isr_bkpt isr_invalid -decl_isr_bkpt NMI_Handler -decl_isr_bkpt HardFault_Handler -decl_isr_bkpt SVC_Handler -decl_isr_bkpt PendSV_Handler -decl_isr_bkpt SysTick_Handler - -.global __default_isrs_end -__default_isrs_end: - -.macro decl_isr name -.weak \name -.type \name,%function -.thumb_func -\name: -.endm - -decl_isr TIMER_IRQ_0_Handler -decl_isr TIMER_IRQ_1_Handler -decl_isr TIMER_IRQ_2_Handler -decl_isr TIMER_IRQ_3_Handler -decl_isr PWM_IRQ_WRAP_Handler -decl_isr USBCTRL_IRQ_Handler -decl_isr XIP_IRQ_Handler -decl_isr PIO0_IRQ_0_Handler -decl_isr PIO0_IRQ_1_Handler -decl_isr PIO1_IRQ_0_Handler -decl_isr TIMER_IRQ_1_Handler0 -decl_isr TIMER_IRQ_1_Handler1 -decl_isr TIMER_IRQ_1_Handler2 -decl_isr TIMER_IRQ_1_Handler3 -decl_isr TIMER_IRQ_1_Handler4 -decl_isr TIMER_IRQ_1_Handler5 -decl_isr TIMER_IRQ_1_Handler6 -decl_isr TIMER_IRQ_1_Handler7 -decl_isr TIMER_IRQ_1_Handler8 -decl_isr TIMER_IRQ_1_Handler9 -decl_isr TIMER_IRQ_2_Handler0 -decl_isr TIMER_IRQ_2_Handler1 -decl_isr TIMER_IRQ_2_Handler2 -decl_isr TIMER_IRQ_2_Handler3 -decl_isr TIMER_IRQ_2_Handler4 -decl_isr TIMER_IRQ_2_Handler5 -decl_isr TIMER_IRQ_2_Handler6 -decl_isr TIMER_IRQ_2_Handler7 -decl_isr TIMER_IRQ_2_Handler8 -decl_isr TIMER_IRQ_2_Handler9 -decl_isr TIMER_IRQ_3_Handler0 -decl_isr TIMER_IRQ_3_Handler1 - -// All unhandled USER IRQs fall through to here -.global __unhandled_user_irq -.thumb_func -__unhandled_user_irq: - mrs r0, ipsr - subs r0, #16 -.global unhandled_user_irq_num_in_r0 -unhandled_user_irq_num_in_r0: - bkpt #0 - -// ---------------------------------------------------------------------------- - -.section .binary_info_header, "a" - -// Header must be in first 256 bytes of main image (i.e. excluding flash boot2). -// For flash builds we put it immediately after vector table; for NO_FLASH the -// vectors are at a +0x100 offset because the bootrom enters RAM images directly -// at their lowest address, so we put the header in the VTOR alignment hole. - -#if !PICO_NO_BINARY_INFO -binary_info_header: -.word BINARY_INFO_MARKER_START -.word __binary_info_start -.word __binary_info_end -.word data_cpy_table // we may need to decode pointers that are in RAM at runtime. -.word BINARY_INFO_MARKER_END -#endif - -// ---------------------------------------------------------------------------- - -.section .reset, "ax" - -// On flash builds, the vector table comes first in the image (conventional). -// On NO_FLASH builds, the reset handler section comes first, as the entry -// point is at offset 0 (fixed due to bootrom), and VTOR is highly-aligned. -// Image is entered in various ways: -// -// - NO_FLASH builds are entered from beginning by UF2 bootloader -// -// - Flash builds vector through the table into _reset_handler from boot2 -// -// - Either type can be entered via _entry_point by the debugger, and flash builds -// must then be sent back round the boot sequence to properly initialise flash - -// ELF entry point: -.type _entry_point,%function -.thumb_func -.global _entry_point -_entry_point: - -#if PICO_NO_FLASH - // Vector through our own table (SP, VTOR will not have been set up at - // this point). Same path for debugger entry and bootloader entry. - ldr r0, =__vectors -#else - // Debugger tried to run code after loading, so SSI is in 03h-only mode. - // Go back through bootrom + boot2 to properly initialise flash. - movs r0, #0 -#endif - ldr r1, =(PPB_BASE + M0PLUS_VTOR_OFFSET) - str r0, [r1] - ldmia r0!, {r1, r2} - msr msp, r1 - bx r2 - -// Reset handler: -// - initialises .data -// - clears .bss -// - calls runtime_init -// - calls main -// - calls exit (which should eventually hang the processor via _exit) - -.type _reset_handler,%function -.thumb_func -_reset_handler: - // Only core 0 should run the C runtime startup code; core 1 is normally - // sleeping in the bootrom at this point but check to be sure - ldr r0, =(SIO_BASE + SIO_CPUID_OFFSET) - ldr r0, [r0] - cmp r0, #0 - bne hold_non_core0_in_bootrom - - // In a NO_FLASH binary, don't perform .data copy, since it's loaded - // in-place by the SRAM load. Still need to clear .bss -#if !PICO_NO_FLASH - adr r4, data_cpy_table - - // assume there is at least one entry -1: - ldmia r4!, {r1-r3} - cmp r1, #0 - beq 2f - bl data_cpy - b 1b -2: -#endif - - // Zero out the BSS - ldr r1, =__bss_start__ - ldr r2, =__bss_end__ - movs r0, #0 - b bss_fill_test -bss_fill_loop: - stm r1!, {r0} -bss_fill_test: - cmp r1, r2 - bne bss_fill_loop - -platform_entry: // symbol for stack traces - // Use 32-bit jumps, in case these symbols are moved out of branch range - // (e.g. if main is in SRAM and crt0 in flash) - ldr r1, =runtime_init - blx r1 - ldr r1, =main - blx r1 - ldr r1, =exit - blx r1 - // exit should not return. If it does, hang the core. - // (fall thru into our hang _exit impl -1: // separate label because _exit can be moved out of branch range - bkpt #0 - b 1b - -#if !PICO_NO_FLASH -data_cpy_loop: - ldm r1!, {r0} - stm r2!, {r0} -data_cpy: - cmp r2, r3 - blo data_cpy_loop - bx lr -#endif - -// Note the data copy table is still included for NO_FLASH builds, even though -// we skip the copy, because it is listed in binary info - -.align 2 -data_cpy_table: -#if PICO_COPY_TO_RAM -.word __ram_text_source__ -.word __ram_text_start__ -.word __ram_text_end__ -#endif -.word __etext -.word __data_start__ -.word __data_end__ - -.word __scratch_x_source__ -.word __scratch_x_start__ -.word __scratch_x_end__ - -.word __scratch_y_source__ -.word __scratch_y_start__ -.word __scratch_y_end__ - -.word 0 // null terminator - -// ---------------------------------------------------------------------------- -// Provide safe defaults for _exit and runtime_init -// Full implementations usually provided by platform.c - -.weak runtime_init -.type runtime_init,%function -.thumb_func -runtime_init: - bx lr - -// ---------------------------------------------------------------------------- -// If core 1 somehow gets into crt0 due to a spectacular VTOR mishap, we need to -// catch it and send back to the sleep-and-launch code in the bootrom. Shouldn't -// happen (it should sleep in the ROM until given an entry point via the -// cross-core FIFOs) but it's good to be defensive. - -hold_non_core0_in_bootrom: - ldr r0, = 'W' | ('V' << 8) - bl rom_func_lookup - bx r0 - -// ---------------------------------------------------------------------------- -// Stack/heap dummies to set size - -// Prior to SDK 1.5.1 these were `.section .stack` without the `, "a"`... Clang linker gives a warning about this, -// however setting it explicitly to `, "a"` makes GCC *now* discard the section unless it is also KEEP. This -// seems like very surprising behavior! -// -// Strictly the most correct thing to do (as .stack and .heap are unreferenced) is to mark them as "a", and also KEEP, which -// works correctly for both GCC and Clang, however doing so may break anyone who already has custom linker scripts without -// the KEEP. Therefore we will only add the "a" on Clang, but will also use KEEP to our own linker scripts. - -.macro spacer_section name -#if PICO_ASSEMBLER_IS_CLANG -.section \name, "a" -#else -.section \name -#endif -.endm - -spacer_section .stack -// align to allow for memory protection (although this alignment is pretty much ignored by linker script) -.p2align 5 - .equ StackSize, PICO_STACK_SIZE -.space StackSize - -spacer_section .heap -.p2align 2 - .equ HeapSize, PICO_HEAP_SIZE -.space HeapSize diff --git a/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_standard_link/doc.h b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_standard_link/doc.h deleted file mode 100644 index d8ce3d49007..00000000000 --- a/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_standard_link/doc.h +++ /dev/null @@ -1,10 +0,0 @@ -/** - * \defgroup pico_standard_link pico_standard_link - * \brief Standard link step providing the basics for creating a runnable binary - * - * This includes - * - C runtime initialization - * - Linker scripts for 'default', 'no_flash', 'blocked_ram' and 'copy_to_ram' binaries - * - 'Binary Information' support - * - Linker option control - */ diff --git a/targets/TARGET_RASPBERRYPI/reimport_pico_sdk.py b/targets/TARGET_RASPBERRYPI/reimport_pico_sdk.py index 660608f94d6..e8ec49f3e90 100644 --- a/targets/TARGET_RASPBERRYPI/reimport_pico_sdk.py +++ b/targets/TARGET_RASPBERRYPI/reimport_pico_sdk.py @@ -65,6 +65,7 @@ pathlib.Path("pico_sdk_version.cmake"), pathlib.Path("src") / "rp2_common" / "hardware_base", pathlib.Path("src") / "rp2_common" / "hardware_adc", + pathlib.Path("src") / "rp2_common" / "hardware_divider", pathlib.Path("src") / "rp2_common" / "hardware_resets", pathlib.Path("src") / "rp2_common" / "hardware_pwm", pathlib.Path("src") / "rp2_common" / "hardware_flash", @@ -83,6 +84,7 @@ pathlib.Path("src") / "rp2_common" / "hardware_rtc", pathlib.Path("src") / "rp2_common" / "pico_bootrom", pathlib.Path("src") / "rp2_common" / "pico_platform", + pathlib.Path("src") / "rp2_common" / "pico_float", pathlib.Path("src") / "rp2_common" / "cmsis" / "stub", pathlib.Path("src") / "common" / "pico_time", pathlib.Path("src") / "common" / "pico_sync", From 3097511e9d21abccb4076b4334cc5711bd003bfb Mon Sep 17 00:00:00 2001 From: Jamie Smith Date: Fri, 5 Jan 2024 19:10:58 -0800 Subject: [PATCH 2/2] Add license header --- targets/TARGET_RASPBERRYPI/reimport_pico_sdk.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/targets/TARGET_RASPBERRYPI/reimport_pico_sdk.py b/targets/TARGET_RASPBERRYPI/reimport_pico_sdk.py index e8ec49f3e90..6eb9c0dbb4e 100644 --- a/targets/TARGET_RASPBERRYPI/reimport_pico_sdk.py +++ b/targets/TARGET_RASPBERRYPI/reimport_pico_sdk.py @@ -1,3 +1,6 @@ +# Copyright (c) 2024 ARM Limited. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + """ This script can be used to reimport a newer version of the RPi Pico SDK.