Fix and re-enable dynamic scheduler scaling

Change dynamic scheduler scaling implementation in order to resolve the hangs encountered in #2451. The previous implementation assumed that signalling to wake a thread was a reliable operation. Apparently, that's not necessarily true (see https://en.wikipedia.org/wiki/Spurious_wakeup and https://askldjd.com/2010/04/24/the-lost-wakeup-problem/). Seeing as we couldn't find any other explanation for why the previous implementation was experiencing hangs, I've assumed it is either because of lost wake ups or spurious wake ups and redesigned the logic accordingly. Now, when a thread is about to suspend, it will decrement the `active_scheduler_count` and then suspend. When it wakes up, it will check to see if the `active_scheduler_count` is at least as big as its `index`. If the `active_scheduler_count` isn't big enough, the thread will suspend itself again immediately. If it is big enough, it will resume. Threads no longer modify `active_scheduler_count` when they wake up. `active_scheduler_count` must now be modified by the thread that is waking up another thread prior to sending the wake up notification. Additionally, since we're now assuming that wake up signals can be lost, we now send multiple wake up notifications just in case. While this is somewhat wasteful, it is better than being in a situation where some threads aren't woken up at all (i.e. a hang). Additionally, only use `scheduler_count_changing` for `signals` implementation of dynamic scheduler scaling. `pthreads` implementation now uses a mutex (`sched_mut`) in its place. We also now change logic to only unlock mutex in `pthreads` implementation once threads have been woken to avoid potential lost wake ups. This isn't an issue for the `signals` implementation and the unlocking of `scheduler_count_changing` can remain where it is prior to threads being woken up. This commit also splits out scheduler block/unblock message handling logic into their own functions (this is so that sched 0 can call those functions directly instead of sending messages to itself). This commit also includes a change inspired by #2474. Now, *all* scheduler threads can suspend as long as there is at least one noisy actor registered with the ASIO subsystem. If there are no noisy actors registered with the ASIO subsystem then scheduler 0 is not allowed to suspend itself because it is reponsible for quiescence detection. Lastly, this commit adds logic to allow a scheduler thread to suspend even if it has already sent a scheduler block message so that we can now suspend scheduler threads in most scenarios.
ponylang · Jan 20, 2018 · fc80968 · fc80968
1 parent 42b5217
commit fc80968
Show file tree

Hide file tree

Showing 6 changed files with 364 additions and 145 deletions.
diff --git a/src/common/threads.h b/src/common/threads.h
@@ -57,7 +57,11 @@ void ponyint_thread_detach(pony_thread_id_t thread);
 
 pony_thread_id_t ponyint_thread_self();
 
+#if defined(USE_SCHEDULER_SCALING_PTHREADS)
+void ponyint_thread_suspend(pony_signal_event_t signal, pthread_mutex_t* mut);
+#else
 void ponyint_thread_suspend(pony_signal_event_t signal);
+#endif
 
 int ponyint_thread_wake(pony_thread_id_t thread, pony_signal_event_t signal);
 

diff --git a/src/libponyc/options/options.c b/src/libponyc/options/options.c
@@ -209,7 +209,7 @@ static void usage(void)
     "  --ponythreads    Use N scheduler threads. Defaults to the number of\n"
     "                   cores (not hyperthreads) available.\n"
     "  --ponyminthreads Minimum number of active scheduler threads allowed.\n"
-    "                   Defaults to the number of '--ponythreads'.\n"
+    "                   Defaults to 0.\n"
     "  --ponycdmin      Defer cycle detection until 2^N actors have blocked.\n"
     "                   Defaults to 2^4.\n"
     "  --ponycdmax      Always cycle detect when 2^N actors have blocked.\n"

diff --git a/src/libponyrt/asio/event.c b/src/libponyrt/asio/event.c
@@ -2,6 +2,7 @@
 #include "asio.h"
 #include "../actor/actor.h"
 #include "../mem/pool.h"
+#include "../sched/scheduler.h"
 #include "ponyassert.h"
 #include <string.h>
 
@@ -122,4 +123,7 @@ PONY_API void pony_asio_event_send(asio_event_t* ev, uint32_t flags,
   // sender they aren't covered by backpressure. We pass false for an early
   // bailout in the backpressure code.
   pony_sendv(pony_ctx(), ev->owner, &m->msg, &m->msg, false);
+
+  // maybe wake up a scheduler thread if they've all fallen asleep
+  ponyint_sched_maybe_wakeup_if_all_asleep(-1);
 }
diff --git a/src/libponyrt/platform/threads.c b/src/libponyrt/platform/threads.c
@@ -18,17 +18,6 @@
 typedef cpuset_t cpu_set_t;
 #endif
 
-#if defined(USE_SCHEDULER_SCALING_PTHREADS)
-static pthread_mutex_t sleep_mut;
-
-static pthread_once_t sleep_mut_once = PTHREAD_ONCE_INIT;
-
-void sleep_mut_init()
-{
-    pthread_mutex_init(&sleep_mut, NULL);
-}
-#endif
-
 #if defined(PLATFORM_IS_LINUX)
 
 #include <dlfcn.h>
@@ -213,10 +202,6 @@ bool ponyint_thread_create(pony_thread_id_t* thread, thread_fn start,
     return false;
 #endif
 
-#if !defined(PLATFORM_IS_WINDOWS) && defined(USE_SCHEDULER_SCALING_PTHREADS)
-  pthread_once(&sleep_mut_once, sleep_mut_init);
-#endif
-
   return true;
 }
 
@@ -249,22 +234,21 @@ pony_thread_id_t ponyint_thread_self()
 #endif
 }
 
+#if defined(USE_SCHEDULER_SCALING_PTHREADS)
+void ponyint_thread_suspend(pony_signal_event_t signal, pthread_mutex_t* mut)
+#else
 void ponyint_thread_suspend(pony_signal_event_t signal)
+#endif
 {
 #ifdef PLATFORM_IS_WINDOWS
   WaitForSingleObject(signal, INFINITE);
 #elif defined(USE_SCHEDULER_SCALING_PTHREADS)
   int ret;
-  // lock mutex
-  ret = pthread_mutex_lock(&sleep_mut);
 
   // wait for condition variable (will sleep and release mutex)
-  ret = pthread_cond_wait(signal, &sleep_mut);
+  ret = pthread_cond_wait(signal, mut);
   // TODO: What to do if `ret` is an unrecoverable error?
   (void) ret;
-
-  // unlock mutex
-  ret = pthread_mutex_unlock(&sleep_mut);
 #else
   int sig;
   sigset_t sigmask;