Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable RPi Pico's optimized ROM floating point routines #202

Merged
merged 2 commits into from
Jan 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions targets/TARGET_RASPBERRYPI/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,83 @@ file(GENERATE
CONTENT "${header_content}"
)

# add a link option to wrap the given function name; i.e. -Wl:wrap=FUNCNAME for gcc
function(pico_wrap_function TARGET FUNCNAME)
target_link_options(${TARGET} INTERFACE "LINKER:--wrap=${FUNCNAME}")
endfunction()

# Following is copied from src/rp2_common/pico_float/CMakeLists.txt
function(wrap_float_functions TARGET)
pico_wrap_function(${TARGET} __aeabi_fadd)
pico_wrap_function(${TARGET} __aeabi_fdiv)
pico_wrap_function(${TARGET} __aeabi_fmul)
pico_wrap_function(${TARGET} __aeabi_frsub)
pico_wrap_function(${TARGET} __aeabi_fsub)
pico_wrap_function(${TARGET} __aeabi_cfcmpeq)
pico_wrap_function(${TARGET} __aeabi_cfrcmple)
pico_wrap_function(${TARGET} __aeabi_cfcmple)
pico_wrap_function(${TARGET} __aeabi_fcmpeq)
pico_wrap_function(${TARGET} __aeabi_fcmplt)
pico_wrap_function(${TARGET} __aeabi_fcmple)
pico_wrap_function(${TARGET} __aeabi_fcmpge)
pico_wrap_function(${TARGET} __aeabi_fcmpgt)
pico_wrap_function(${TARGET} __aeabi_fcmpun)
pico_wrap_function(${TARGET} __aeabi_i2f)
pico_wrap_function(${TARGET} __aeabi_l2f)
pico_wrap_function(${TARGET} __aeabi_ui2f)
pico_wrap_function(${TARGET} __aeabi_ul2f)
pico_wrap_function(${TARGET} __aeabi_f2iz)
pico_wrap_function(${TARGET} __aeabi_f2lz)
pico_wrap_function(${TARGET} __aeabi_f2uiz)
pico_wrap_function(${TARGET} __aeabi_f2ulz)
pico_wrap_function(${TARGET} __aeabi_f2d)
pico_wrap_function(${TARGET} sqrtf)
pico_wrap_function(${TARGET} cosf)
pico_wrap_function(${TARGET} sinf)
pico_wrap_function(${TARGET} tanf)
pico_wrap_function(${TARGET} atan2f)
pico_wrap_function(${TARGET} expf)
pico_wrap_function(${TARGET} logf)

pico_wrap_function(${TARGET} ldexpf)
pico_wrap_function(${TARGET} copysignf)
pico_wrap_function(${TARGET} truncf)
pico_wrap_function(${TARGET} floorf)
pico_wrap_function(${TARGET} ceilf)
pico_wrap_function(${TARGET} roundf)
pico_wrap_function(${TARGET} sincosf) # gnu
pico_wrap_function(${TARGET} asinf)
pico_wrap_function(${TARGET} acosf)
pico_wrap_function(${TARGET} atanf)
pico_wrap_function(${TARGET} sinhf)
pico_wrap_function(${TARGET} coshf)
pico_wrap_function(${TARGET} tanhf)
pico_wrap_function(${TARGET} asinhf)
pico_wrap_function(${TARGET} acoshf)
pico_wrap_function(${TARGET} atanhf)
pico_wrap_function(${TARGET} exp2f)
pico_wrap_function(${TARGET} log2f)
pico_wrap_function(${TARGET} exp10f)
pico_wrap_function(${TARGET} log10f)
pico_wrap_function(${TARGET} powf)
pico_wrap_function(${TARGET} powintf) #gnu
pico_wrap_function(${TARGET} hypotf)
pico_wrap_function(${TARGET} cbrtf)
pico_wrap_function(${TARGET} fmodf)
pico_wrap_function(${TARGET} dremf)
pico_wrap_function(${TARGET} remainderf)
pico_wrap_function(${TARGET} remquof)
pico_wrap_function(${TARGET} expm1f)
pico_wrap_function(${TARGET} log1pf)
pico_wrap_function(${TARGET} fmaf)
endfunction()

# Now, add includes and headers from the Pico SDK
target_include_directories(mbed-raspberrypi
INTERFACE
.
pico-sdk/src/rp2_common/hardware_adc/include
pico-sdk/src/rp2_common/hardware_divider/include
pico-sdk/src/rp2_common/hardware_gpio/include
pico-sdk/src/rp2_common/hardware_resets/include
pico-sdk/src/rp2_common/hardware_pwm/include
Expand All @@ -54,6 +126,7 @@ target_include_directories(mbed-raspberrypi
pico-sdk/src/rp2_common/pico_platform/include
pico-sdk/src/rp2_common/pico_fix/rp2040_usb_device_enumeration/include/
pico-sdk/src/rp2_common/pico_bootrom/include
pico-sdk/src/rp2_common/pico_float/include
pico-sdk/src/rp2_common/hardware_claim/include
pico-sdk/src/common/pico_sync/include
pico-sdk/src/common/pico_time/include
Expand Down Expand Up @@ -89,6 +162,11 @@ target_sources(mbed-raspberrypi
pico-sdk/src/common/pico_time/time.c
pico-sdk/src/common/pico_sync/lock_core.c
pico-sdk/src/rp2_common/cmsis/stub/CMSIS/Device/RaspberryPi/RP2040/Source/system_RP2040.c
pico-sdk/src/rp2_common/pico_float/float_aeabi.S
pico-sdk/src/rp2_common/pico_float/float_init_rom.c
pico-sdk/src/rp2_common/pico_float/float_math.c
pico-sdk/src/rp2_common/pico_float/float_v1_rom_shim.S
pico-sdk/src/rp2_common/hardware_divider/divider.S
)

target_compile_definitions(mbed-raspberrypi
Expand All @@ -110,4 +188,7 @@ target_sources(mbed-rp2040
pico-sdk/src/rp2_common/pico_fix/rp2040_usb_device_enumeration/rp2040_usb_device_enumeration.c
)

# Enable usage of the RPi Pico optimized floating point routines
wrap_float_functions(mbed-rp2040)

add_subdirectory(TARGET_RP2040 EXCLUDE_FROM_ALL)
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#include "pico/asm_helper.S"
#include "hardware/regs/addressmap.h"
#include "hardware/regs/sio.h"

pico_default_asm_setup

// tag::hw_div_s32[]
regular_func_with_section hw_divider_divmod_s32
ldr r3, =(SIO_BASE)
str r0, [r3, #SIO_DIV_SDIVIDEND_OFFSET]
str r1, [r3, #SIO_DIV_SDIVISOR_OFFSET]
b hw_divider_divmod_return
// end::hw_div_s32[]

// tag::hw_div_u32[]
regular_func_with_section hw_divider_divmod_u32
ldr r3, =(SIO_BASE)
str r0, [r3, #SIO_DIV_UDIVIDEND_OFFSET]
str r1, [r3, #SIO_DIV_UDIVISOR_OFFSET]
b hw_divider_divmod_return
// end::hw_div_u32[]

// Common delay and return section for s32 and u32
.section .text.hw_divider_divmod_return
hw_divider_divmod_return:
// Branching here is 2 cycles, delay another 6
b 1f
1: b 1f
1: b 1f
1: // return 64 bit value so we can efficiently return both (note quotient must be read last)
ldr r1, [r3, #SIO_DIV_REMAINDER_OFFSET]
ldr r0, [r3, #SIO_DIV_QUOTIENT_OFFSET]
bx lr

regular_func_with_section hw_divider_save_state
ldr r3, =SIO_BASE
ldr r1, [r3, #SIO_DIV_UDIVIDEND_OFFSET]
ldr r2, [r3, #SIO_DIV_UDIVISOR_OFFSET]
stmia r0!, {r1-r2}
// The 8 cycles needed to guarantee that the result is ready is ensured by the preceeding
// code of 7 cycles together with any branch to it taking at least 2 cycles.
ldr r1, [r3, #SIO_DIV_REMAINDER_OFFSET]
ldr r2, [r3, #SIO_DIV_QUOTIENT_OFFSET]
stmia r0!, {r1-r2}
bx lr

regular_func_with_section hw_divider_restore_state
ldr r3, =SIO_BASE
ldmia r0!, {r1-r2}
str r1, [r3, #SIO_DIV_UDIVIDEND_OFFSET]
str r2, [r3, #SIO_DIV_UDIVISOR_OFFSET]
ldmia r0!, {r1-r2}
str r1, [r3, #SIO_DIV_REMAINDER_OFFSET]
str r2, [r3, #SIO_DIV_QUOTIENT_OFFSET]
bx lr
Loading