Skip to content

Commit

Permalink
[release/8.0-staging] [mono] Fix deadlock in static constructor initi…
Browse files Browse the repository at this point in the history
…alization (#93943)


Backport of #93875 to release/8.0-staging

* [mono] Fix deadlock in static constructor initialization

If two threads (A and B) need to call the static constructors for 3
classes X, Y, Z in this order:

Thread A: X, Z, Y
Thread B: Y, Z, X

where the cctor for X in thread A invokes the cctor for Z and for Y, and the
cctor for Y in thread B invokes the cctor for Z and X, then we can get
into a situation where A and B both start the cctors for X and Y (so
they will be in the type_initialization_hash for those two classes)
and then both will try to init Z.  In that case it could be that A
will be responsible for initializing Z and B will block.  Then A could
finish initializing Z but B may not have woken up yet (and so it will
be in blocked_thread_hash waiting for Z).  At that point A (who is at
this point already need to init Y) may think that it can wait for B to
finish initializing Y.  That is we get to
`mono_coop_cond_timedwait (&lock->cond, &lock->mutex, timeout_ms)`
with "lock" being the lock for `Y`.  But in fact thread B will not be
able to complete initializing Y because it will attempt to init X next
- but meanwhile we did not indicate that thread A is blocked.
As a result in thread A the timed wait will eventually timeout.  And
in this case we need to go back to the top and now correctly detect
that A is waiting for Y and B is waiting for X.  (At that point there
is a cctor deadlock and ECMA rules allow one of the threads to return
without calling the cctor)

The old code here used to do an infinite wait:
  while (!lock->done)
    mono_coop_cond_wait (&lock->cond, &lock->mutex)

This cannot succeed because "lock" (in thread A it's the lock for Y)
will not be signaled since B (who is supposed to init Y) will instead
block on the cctor for X.

Fixes #93778

* Add test case

* remove prototyping log messages

* disable mt test on wasm

* better issues.target exclusion

* code review feedback


Co-authored-by: Aleksey Kliger <[email protected]>
  • Loading branch information
github-actions[bot] and lambdageek authored Nov 14, 2023
1 parent 4811680 commit dc944d5
Show file tree
Hide file tree
Showing 4 changed files with 243 additions and 2 deletions.
51 changes: 49 additions & 2 deletions src/mono/mono/metadata/object.c
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,7 @@ mono_runtime_class_init_full (MonoVTable *vtable, MonoError *error)
* on this cond var.
*/

retry_top:
mono_type_initialization_lock ();
/* double check... */
if (vtable->initialized) {
Expand Down Expand Up @@ -507,6 +508,12 @@ mono_runtime_class_init_full (MonoVTable *vtable, MonoError *error)
blocked = GUINT_TO_POINTER (MONO_NATIVE_THREAD_ID_TO_UINT (lock->initializing_tid));
while ((pending_lock = (TypeInitializationLock*) g_hash_table_lookup (blocked_thread_hash, blocked))) {
if (mono_native_thread_id_equals (pending_lock->initializing_tid, tid)) {
if (mono_trace_is_traced (G_LOG_LEVEL_DEBUG, MONO_TRACE_TYPE)) {
char* type_name = mono_type_full_name (m_class_get_byval_arg (klass));
mono_trace (G_LOG_LEVEL_DEBUG, MONO_TRACE_TYPE, "Detected deadlock for class .cctor for %s from '%s'", type_name, m_class_get_image (klass)->name);
g_free (type_name);
}

if (!pending_lock->done) {
mono_type_initialization_unlock ();
goto return_true;
Expand Down Expand Up @@ -605,9 +612,49 @@ mono_runtime_class_init_full (MonoVTable *vtable, MonoError *error)
} else {
/* this just blocks until the initializing thread is done */
mono_type_init_lock (lock);
while (!lock->done)
mono_coop_cond_wait (&lock->cond, &lock->mutex);
if (!lock->done) {
int timeout_ms = 500;
int wait_result = mono_coop_cond_timedwait (&lock->cond, &lock->mutex, timeout_ms);
if (wait_result == -1) {
/* timed out - go around again from the beginning. If we got here
* from the "is_blocked = FALSE" case, above (another thread was
* blocked on the current thread, but on a lock that was already
* done but it didn't get to wake up yet), then it might still be
* the case that the current thread cannot proceed even if the other
* thread got to wake up - there might be a new deadlock. We need
* to re-evaluate.
*
* This can happen if two threads A and B need to call the cctors
* for classes X and Y but in opposite orders, and also call a cctor
* for a third class Z. (That is thread A wants to init: X, Z, Y;
* thread B wants to init: Y, Z, X.) In that case, if B is waiting
* for A to finish initializing Z, and A (the current thread )
* already finished Z and wants to init Y. In A, control will come
* here with "lock" being Y's lock. But we will time out because B
* will see that A is responsible for initializing X and will also
* block. So A is waiting for B to finish Y and B is waiting for A
* to finish X. So the fact that A allowed B to wait for Z to
* finish didn't actually let us make progress. Thread A must go
* around to the top once more and try to init Y - and detect that
* there is now a deadlock between X and Y.
*/
mono_type_init_unlock (lock);
// clean up blocked thread hash and lock refcount.
mono_type_initialization_lock ();
g_hash_table_remove (blocked_thread_hash, GUINT_TO_POINTER (tid));
gboolean deleted = unref_type_lock (lock);
if (deleted)
g_hash_table_remove (type_initialization_hash, vtable);
mono_type_initialization_unlock ();
goto retry_top;
} else if (wait_result == 0) {
/* Success: we were signaled that the other thread is done. Proceed */
} else {
g_assert_not_reached ();
}
}
mono_type_init_unlock (lock);
g_assert (lock->done);
}

/* Do cleanup and setting vtable->initialized inside the global lock again */
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Runtime.CompilerServices;
using System.Threading;

using Xunit;

// Regression test for https://github.com/dotnet/runtime/issues/93778
namespace CircularCctorTwoThreadsThreeCC;

[Flags]
public enum SlotConstants : int
{
ZInited = 1,
YInitedFromX = 2,
XInitedFromY = 4,

Thread1 = 1 << 16,
Thread2 = 2 << 16,
}

/// X and Y both try to use each other, and also both use Z.
/// We expect to see exactly one thread initialize Z and
/// either X inited from Y or Y inited from X.
public class X
{
public static X Singleton = new();
private X() {
Z.Singleton.Ping();
Y.Singleton?.Pong();
}

public void Pong() => Coordinator.Note(SlotConstants.XInitedFromY);
}

public class Y
{
public static Y Singleton = new();
private Y() {
Z.Singleton.Ping();
X.Singleton?.Pong();
}

public void Pong() => Coordinator.Note(SlotConstants.YInitedFromX);
}

public class Z
{
public static Z Singleton = new();

private Z() {
Coordinator.Note(SlotConstants.ZInited);
}

public void Ping() { }

}

public class Coordinator
{
[ThreadStatic]
private static SlotConstants t_threadTag;

private static int s_NextNote;
private static readonly SlotConstants[] Notes = new SlotConstants[12];

private static SlotConstants DecorateWithThread(SlotConstants c)
{
return c | t_threadTag;
}

public static void Note(SlotConstants s) {
int idx = Interlocked.Increment(ref s_NextNote);
idx--;
Notes[idx] = DecorateWithThread (s);
}

public static Coordinator CreateThread(bool xThenY, SlotConstants threadTag)
{
return new Coordinator(xThenY, threadTag);
}

public readonly Thread Thread;
private static readonly Barrier s_barrier = new (3);

private Coordinator(bool xThenY, SlotConstants threadTag)
{
var t = new Thread(() => {
t_threadTag = threadTag;
// Log("started");
NextPhase();
// Log("racing");
DoConstructions(xThenY);
NextPhase();
// Log("done");
});
Thread = t;
t.Start();
}

public static void NextPhase() { s_barrier.SignalAndWait(); }

[MethodImpl(MethodImplOptions.NoInlining)]
public static void DoConstructions(bool xThenY)
{
if (xThenY) {
XCreate();
} else {
YCreate();
}
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static void XCreate()
{
var _ = X.Singleton;
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static void YCreate()
{
var _ = Y.Singleton;
}

public static void Log(string msg)
{
Console.WriteLine ($"{Thread.CurrentThread.ManagedThreadId}: {msg}");
}

[Fact]
public static void RunTestCase()
{
var c1 = CreateThread(xThenY: true, threadTag: SlotConstants.Thread1);
var c2 = CreateThread(xThenY: false, threadTag: SlotConstants.Thread2);
// created all threads
NextPhase();
// racing
NextPhase();
// done

// one second should be plenty for all these threads, but it's arbitrary
int threadJoinTimeoutMS = 1000;
var j1 = c1.Thread.Join(threadJoinTimeoutMS);
var j2 = c2.Thread.Join(threadJoinTimeoutMS);
Assert.True(j1);
Assert.True(j2);
// all joined

// exactly one thread inited Z
Assert.Equal(1, CountNotes(SlotConstants.ZInited));
// either X was inited or Y, not both.
Assert.Equal(1, Count2Notes(SlotConstants.XInitedFromY, SlotConstants.YInitedFromX));
}

private static int CountNotes(SlotConstants mask)
{
int found = 0;
foreach (var note in Notes) {
if ((note & mask) != (SlotConstants)0) {
found++;
}
}
return found;
}

private static int Count2Notes(SlotConstants mask1, SlotConstants mask2)
{
int found = 0;
foreach (var note in Notes) {
if ((note & mask1) != (SlotConstants)0) {
found++;
}
if ((note & mask2) != (SlotConstants)0) {
found++;
}
}
return found;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
<CLRTestPriority>0</CLRTestPriority>
</PropertyGroup>
<ItemGroup>
<Compile Include="CircularCctorTwoThreadsThreeCC.cs" />
</ItemGroup>
</Project>
3 changes: 3 additions & 0 deletions src/tests/issues.targets
Original file line number Diff line number Diff line change
Expand Up @@ -3259,6 +3259,9 @@
<ExcludeList Include = "$(XunitTestBinBase)/tracing/eventcounter/pollingcounter/**">
<Issue>System.Threading.Thread.UnsafeStart not supported</Issue>
</ExcludeList>
<ExcludeList Include = "$(XunitTestBinBase)/Loader/classloader/TypeInitialization/CircularCctors/CircularCctorTwoThreadsThreeCC/**">
<Issue>System.Threading.Thread.ThrowIfNoThreadStart: PlatformNotSupportedException</Issue>
</ExcludeList>
<ExcludeList Include = "$(XunitTestBinBase)/tracing/eventcounter/gh53564/**">
<Issue>Could not load legacy Microsoft.Diagnostics.Tools.RuntimeClient</Issue>
</ExcludeList>
Expand Down

0 comments on commit dc944d5

Please sign in to comment.