Skip to content

Commit

Permalink
Merge #18620
Browse files Browse the repository at this point in the history
18620: core: add core_mutex_debug to aid debugging deadlocks r=maribu a=maribu

### Contribution description

Adding `USEMODULE += core_mutex_debug` to your `Makefile` results in
on log messages such as

    [mutex] waiting for thread 1 (pc = 0x800024d)

being added whenever `mutex_lock()` blocks. This makes tracing down
deadlocks easier.

### Testing procedure

Run e.g.

```sh
USEMODULE=core_mutex_debug BOARD=nucleo-f767zi make -C tests/mutex_cancel flash test
```

which should provide output such as

```
Welcome to pyterm!
Type '/exit' to exit.
READY
s
[mutex] waiting for thread 1 (pc = 0x8000f35)
START
main(): This is RIOT! (Version: 2022.10-devel-841-g5cc02-core/mutex/debug)
Test Application for mutex_cancel / mutex_lock_cancelable
=========================================================

Test without cancellation: OK
Test early cancellation: OK
Verify no side effects on subsequent calls: [mutex] waiting for thread 1 (pc = 0x800024d)
OK
Test late cancellation: [mutex] waiting for thread 1 (pc = 0x0)
OK
TEST PASSED
```

```sh
$ arm-none-eabi-addr2line -a 0x800024d -e tests/mutex_cancel/bin/nucleo-f767zi/tests_mutex_cancel.elf 
0x0800024d
/home/maribu/Repos/software/RIOT/tests/mutex_cancel/main.c:51
```

### Issues/PRs references

Depends on and includes #18619

Co-authored-by: Marian Buschsieweke <[email protected]>
  • Loading branch information
bors[bot] and maribu authored Apr 25, 2023
2 parents 8b58e55 + 3e86e39 commit 9716808
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 8 deletions.
3 changes: 3 additions & 0 deletions core/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ config MODULE_CORE_MSG_BUS
help
Messaging Bus API for inter process message broadcast.

config MODULE_CORE_MUTEX_DEBUG
bool "Aid debugging deadlocks by printing on whom mutex_lock() is waiting"

config MODULE_CORE_MUTEX_PRIORITY_INHERITANCE
bool "Use priority inheritance to mitigate priority inversion for mutexes"

Expand Down
35 changes: 34 additions & 1 deletion core/include/mutex.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,25 @@
* `MUTEX_LOCK`.
* - The scheduler is run, so that if the unblocked waiting thread can
* run now, in case it has a higher priority than the running thread.
*
* Debugging deadlocks
* -------------------
*
* The module `core_mutex_debug` can be used to print on whom `mutex_lock()`
* is waiting. This information includes the thread ID of the owner and the
* program counter (PC) from where `mutex_lock()` was called. Note that the
* information is only valid if:
*
* - The mutex was locked by a thread, and not e.g. by `MUTEX_INIT_LOCKED`
* - The function `cpu_get_caller_pc()` is implemented for the target
* architecture. (The thread ID will remain valid, though.)
* - The caller PC is briefly 0 when the current owner passes over ownership
* to the next thread, but that thread didn't get CPU time yet to write its
* PC into the data structure. Even worse, on architectures where an aligned
* function-pointer-sized write is not atomic, the value may briefly be
* bogus. Chances are close to zero this ever hits and since this only
* effects debug output, the ostrich algorithm was chosen here.
*
* @{
*
* @file
Expand All @@ -112,6 +131,7 @@
#include <stdint.h>
#include <stdbool.h>

#include "architecture.h"
#include "kernel_defines.h"
#include "list.h"
#include "thread.h"
Expand All @@ -130,7 +150,8 @@ typedef struct {
* @internal
*/
list_node_t queue;
#if defined(DOXYGEN) || defined(MODULE_CORE_MUTEX_PRIORITY_INHERITANCE)
#if defined(DOXYGEN) || defined(MODULE_CORE_MUTEX_PRIORITY_INHERITANCE) \
|| defined(MODULE_CORE_MUTEX_DEBUG)
/**
* @brief The current owner of the mutex or `NULL`
* @note Only available if module core_mutex_priority_inheritance
Expand All @@ -141,6 +162,18 @@ typedef struct {
* this will have the value of `NULL`.
*/
kernel_pid_t owner;
#endif
#if defined(DOXYGEN) || defined(MODULE_CORE_MUTEX_DEBUG)
/**
* @brief Program counter of the call to @ref mutex_lock that most
* recently acquired this mutex
*
* This is used when the module `core_mutex_debug` is used to debug
* deadlocks and is non-existing otherwise
*/
uinttxtptr_t owner_calling_pc;
#endif
#if defined(DOXYGEN) || defined(MODULE_CORE_MUTEX_PRIORITY_INHERITANCE)
/**
* @brief Original priority of the owner
* @note Only available if module core_mutex_priority_inheritance
Expand Down
49 changes: 42 additions & 7 deletions core/mutex.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,18 @@
* function into both @ref mutex_lock and @ref mutex_lock_cancelable is,
* therefore, beneficial for the majority of applications.
*/
static inline __attribute__((always_inline)) void _block(mutex_t *mutex,
unsigned irq_state)
static inline __attribute__((always_inline))
void _block(mutex_t *mutex,
unsigned irq_state,
uinttxtptr_t pc)
{
/* pc is only used when MODULE_CORE_MUTEX_DEBUG */
(void)pc;
#if IS_USED(MODULE_CORE_MUTEX_DEBUG)
printf("[mutex] waiting for thread %" PRIkernel_pid " (pc = 0x%" PRIxTXTPTR
")\n",
mutex->owner, mutex->owner_calling_pc);
#endif
thread_t *me = thread_get_active();

/* Fail visibly even if a blocking action is called from somewhere where
Expand Down Expand Up @@ -80,10 +89,17 @@ static inline __attribute__((always_inline)) void _block(mutex_t *mutex,
irq_restore(irq_state);
thread_yield_higher();
/* We were woken up by scheduler. Waker removed us from queue. */
#if IS_USED(MODULE_CORE_MUTEX_DEBUG)
mutex->owner_calling_pc = pc;
#endif
}

bool mutex_lock_internal(mutex_t *mutex, bool block)
{
uinttxtptr_t pc = 0;
#if IS_USED(MODULE_CORE_MUTEX_DEBUG)
pc = cpu_get_caller_pc();
#endif
unsigned irq_state = irq_disable();

DEBUG("PID[%" PRIkernel_pid "] mutex_lock_internal(block=%u).\n",
Expand All @@ -92,9 +108,15 @@ bool mutex_lock_internal(mutex_t *mutex, bool block)
if (mutex->queue.next == NULL) {
/* mutex is unlocked. */
mutex->queue.next = MUTEX_LOCKED;
#ifdef MODULE_CORE_MUTEX_PRIORITY_INHERITANCE
#if IS_USED(MODULE_CORE_MUTEX_PRIORITY_INHERITANCE) \
|| IS_USED(MODULE_CORE_MUTEX_DEBUG)
thread_t *me = thread_get_active();
mutex->owner = me->pid;
#endif
#if IS_USED(MODULE_CORE_MUTEX_DEBUG)
mutex->owner_calling_pc = pc;
#endif
#if IS_USED(MODULE_CORE_MUTEX_PRIORITY_INHERITANCE)
mutex->owner_original_priority = me->priority;
#endif
DEBUG("PID[%" PRIkernel_pid "] mutex_lock(): early out.\n",
Expand All @@ -106,14 +128,18 @@ bool mutex_lock_internal(mutex_t *mutex, bool block)
irq_restore(irq_state);
return false;
}
_block(mutex, irq_state);
_block(mutex, irq_state, pc);
}

return true;
}

int mutex_lock_cancelable(mutex_cancel_t *mc)
{
uinttxtptr_t pc = 0;
#if IS_USED(MODULE_CORE_MUTEX_DEBUG)
pc = cpu_get_caller_pc();
#endif
unsigned irq_state = irq_disable();

DEBUG("PID[%" PRIkernel_pid "] mutex_lock_cancelable()\n",
Expand All @@ -131,9 +157,15 @@ int mutex_lock_cancelable(mutex_cancel_t *mc)
if (mutex->queue.next == NULL) {
/* mutex is unlocked. */
mutex->queue.next = MUTEX_LOCKED;
#ifdef MODULE_CORE_MUTEX_PRIORITY_INHERITANCE
#if IS_USED(MODULE_CORE_MUTEX_PRIORITY_INHERITANCE) \
|| IS_USED(MODULE_CORE_MUTEX_DEBUG)
thread_t *me = thread_get_active();
mutex->owner = me->pid;
#endif
#if IS_USED(MODULE_CORE_MUTEX_DEBUG)
mutex->owner_calling_pc = pc;
#endif
#if IS_USED(MODULE_CORE_MUTEX_PRIORITY_INHERITANCE)
mutex->owner_original_priority = me->priority;
#endif
DEBUG("PID[%" PRIkernel_pid "] mutex_lock_cancelable() early out.\n",
Expand All @@ -142,7 +174,7 @@ int mutex_lock_cancelable(mutex_cancel_t *mc)
return 0;
}
else {
_block(mutex, irq_state);
_block(mutex, irq_state, pc);
if (mc->cancelled) {
DEBUG("PID[%" PRIkernel_pid "] mutex_lock_cancelable() "
"cancelled.\n", thread_getpid());
Expand Down Expand Up @@ -185,7 +217,7 @@ void mutex_unlock(mutex_t *mutex)

uint16_t process_priority = process->priority;

#ifdef MODULE_CORE_MUTEX_PRIORITY_INHERITANCE
#if IS_USED(MODULE_CORE_MUTEX_PRIORITY_INHERITANCE)
thread_t *owner = thread_get(mutex->owner);
if ((owner) && (owner->priority != mutex->owner_original_priority)) {
DEBUG("PID[%" PRIkernel_pid "] prio %u --> %u\n",
Expand All @@ -194,6 +226,9 @@ void mutex_unlock(mutex_t *mutex)
sched_change_priority(owner, mutex->owner_original_priority);
}
#endif
#if IS_USED(MODULE_CORE_MUTEX_DEBUG)
mutex->owner_calling_pc = 0;
#endif

irq_restore(irqstate);
sched_switch(process_priority);
Expand Down

0 comments on commit 9716808

Please sign in to comment.