From bac322769267f6055ee84a5b5569be22702bd8a3 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Fri, 2 Jun 2023 13:42:11 -0700 Subject: [PATCH 01/24] introduced TlsRootNode --- .../src/Internal/Runtime/ThreadStatics.cs | 11 +++++ .../DependencyAnalysis/NodeFactory.cs | 3 ++ .../DependencyAnalysis/TlsRootNode.cs | 45 +++++++++++++++++++ .../TypeThreadStaticIndexNode.cs | 8 ++++ .../ILCompiler.Compiler.csproj | 1 + 5 files changed, 68 insertions(+) create mode 100644 src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/ThreadStatics.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/ThreadStatics.cs index 9734e2a0a2632f..059340bb55f410 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/ThreadStatics.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/ThreadStatics.cs @@ -26,6 +26,17 @@ internal static unsafe object GetThreadStaticBaseForType(TypeManagerSlot* pModul if (typeTlsIndex >= 0) return GetUninlinedThreadStaticBaseForType(pModuleData, typeTlsIndex); + // the inlined storage should be per module. Module must have a symbol. + // RhGetInlinedThreadStaticStorage should take the module and figure where the storage is. + + // inlined case - no index (assume -1) + // gets reloc'd offset. + + // is the same as current INLINE_TLS, but gets a reloc'd offset, which module data has. + + // 1) TypeManger or module data must define a symbol that loader understands + // 2) has a reloc'd offset to it. + ref object? threadStorage = ref RuntimeImports.RhGetInlinedThreadStaticStorage(); if (threadStorage != null) return threadStorage; diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs index 415f887b0a3564..56bc517f7f0659 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs @@ -1288,6 +1288,8 @@ public string GetSymbolAlternateName(ISymbolNode node) protected internal TypeManagerIndirectionNode TypeManagerIndirection = new TypeManagerIndirectionNode(); + protected internal TlsRootNode TlsRoot = new TlsRootNode(); + public virtual void AttachToDependencyGraph(DependencyAnalyzerBase graph) { ReadyToRunHeader = new ReadyToRunHeaderNode(); @@ -1299,6 +1301,7 @@ public virtual void AttachToDependencyGraph(DependencyAnalyzerBase graph.AddRoot(ThreadStaticsRegion, "ThreadStaticsRegion is always generated"); graph.AddRoot(EagerCctorTable, "EagerCctorTable is always generated"); graph.AddRoot(TypeManagerIndirection, "TypeManagerIndirection is always generated"); + graph.AddRoot(TlsRoot, "TlsRoot is always generated"); graph.AddRoot(FrozenSegmentRegion, "FrozenSegmentRegion is always generated"); graph.AddRoot(InterfaceDispatchCellSection, "Interface dispatch cell section is always generated"); graph.AddRoot(ModuleInitializerList, "Module initializer list is always generated"); diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs new file mode 100644 index 00000000000000..92f83d655beaff --- /dev/null +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs @@ -0,0 +1,45 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Internal.Text; + +namespace ILCompiler.DependencyAnalysis +{ + public class TlsRootNode : ObjectNode, ISymbolDefinitionNode + { + public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb) + { + // tls_InlinedThreadStatics + sb.Append(nameMangler.CompilationUnitPrefix).Append("__tls_root"); + } + public int Offset => 0; + public override bool IsShareable => false; + + protected override string GetName(NodeFactory factory) => this.GetMangledName(factory.NameMangler); + + public override ObjectNodeSection GetSection(NodeFactory factory) => ObjectNodeSection.TLSSection; + + public override bool StaticDependenciesAreComputed => true; + + public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false) + { + ObjectDataBuilder objData = new ObjectDataBuilder(factory, relocsOnly); + objData.AddSymbol(this); + objData.RequireInitialPointerAlignment(); + + // root + objData.EmitZeroPointer(); + + // next + objData.EmitZeroPointer(); + + // type manager + objData.EmitPointerReloc(factory.TypeManagerIndirection); + + return objData.ToObjectData(); + } + + // TODO: VS where this should come from? + public override int ClassCode => -985742028; + } +} diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TypeThreadStaticIndexNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TypeThreadStaticIndexNode.cs index 772970bf145cae..4d020123e9baaa 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TypeThreadStaticIndexNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TypeThreadStaticIndexNode.cs @@ -83,9 +83,17 @@ protected override ObjectData GetDehydratableData(NodeFactory factory, bool relo } } + // needed to construct storage. + // also need to put in the root, so that we could drop SingleTypeManager thing. objData.EmitPointerReloc(factory.TypeManagerIndirection); + + // tls storage ID for uninlined types. used to get the type from the type manager + // and slot from the array objData.EmitNaturalInt(typeTlsIndex); + // need to emit TLS reloc for the factory.InlinedTlsRoot + // we will pass that to the helper (will use SingletypeManager for now, but later arrange typeman in the TLS) + return objData.ToObjectData(); } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/ILCompiler.Compiler.csproj b/src/coreclr/tools/aot/ILCompiler.Compiler/ILCompiler.Compiler.csproj index ff83282e7f4bb4..fb5b51472d6bfc 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/ILCompiler.Compiler.csproj +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/ILCompiler.Compiler.csproj @@ -526,6 +526,7 @@ + From 1ff41b9ea0c0078098e895b1906effa233b867c6 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Sat, 3 Jun 2023 21:02:38 -0700 Subject: [PATCH 02/24] remove RhGetInlinedThreadStaticStorage --- src/coreclr/nativeaot/Runtime/threadstore.cpp | 5 ---- .../src/Internal/Runtime/ThreadStatics.cs | 28 +++++++------------ .../src/System/Runtime/RuntimeImports.cs | 4 --- 3 files changed, 10 insertions(+), 27 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/threadstore.cpp b/src/coreclr/nativeaot/Runtime/threadstore.cpp index c65d9577095665..53b7a2b6a8a6e7 100644 --- a/src/coreclr/nativeaot/Runtime/threadstore.cpp +++ b/src/coreclr/nativeaot/Runtime/threadstore.cpp @@ -442,11 +442,6 @@ EXTERN_C ThreadBuffer* RhpGetThread() return &tls_CurrentThread; } -COOP_PINVOKE_HELPER(Object**, RhGetInlinedThreadStaticStorage, ()) -{ - return &tls_InlinedThreadStatics.m_threadStaticsBase; -} - #endif // !DACCESS_COMPILE #ifdef _WIN32 diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/ThreadStatics.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/ThreadStatics.cs index 059340bb55f410..594ea389c73fe2 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/ThreadStatics.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/ThreadStatics.cs @@ -17,31 +17,19 @@ namespace Internal.Runtime /// internal static class ThreadStatics { + [ThreadStatic] + private static object t_inlinedThreadStaticBase; + /// /// This method is called from a ReadyToRun helper to get base address of thread /// static storage for the given type. /// internal static unsafe object GetThreadStaticBaseForType(TypeManagerSlot* pModuleData, int typeTlsIndex) { - if (typeTlsIndex >= 0) - return GetUninlinedThreadStaticBaseForType(pModuleData, typeTlsIndex); - - // the inlined storage should be per module. Module must have a symbol. - // RhGetInlinedThreadStaticStorage should take the module and figure where the storage is. - - // inlined case - no index (assume -1) - // gets reloc'd offset. - - // is the same as current INLINE_TLS, but gets a reloc'd offset, which module data has. + if (typeTlsIndex < 0) + return t_inlinedThreadStaticBase; - // 1) TypeManger or module data must define a symbol that loader understands - // 2) has a reloc'd offset to it. - - ref object? threadStorage = ref RuntimeImports.RhGetInlinedThreadStaticStorage(); - if (threadStorage != null) - return threadStorage; - - return GetInlinedThreadStaticBaseSlow(ref threadStorage); + return GetUninlinedThreadStaticBaseForType(pModuleData, typeTlsIndex); } [RuntimeExport("RhpGetInlinedThreadStaticBaseSlow")] @@ -49,6 +37,8 @@ internal static unsafe object GetInlinedThreadStaticBaseSlow(ref object? threadS { Debug.Assert(threadStorage == null); // Allocate an object that will represent a memory block for all thread static fields + + // TODO: VS fetch the type manager from the threadStorage location TypeManagerHandle typeManager = RuntimeImports.RhGetSingleTypeManager(); object threadStaticBase = AllocateThreadStaticStorageForType(typeManager, 0); @@ -57,6 +47,8 @@ internal static unsafe object GetInlinedThreadStaticBaseSlow(ref object? threadS // assign the storage block to the storage variable and return threadStorage = threadStaticBase; + t_inlinedThreadStaticBase = threadStaticBase; + return threadStaticBase; } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/RuntimeImports.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/RuntimeImports.cs index 68b50d5bb966ee..9ce06ca443fd8f 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/RuntimeImports.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/RuntimeImports.cs @@ -560,10 +560,6 @@ internal static IntPtr RhGetModuleSection(TypeManagerHandle module, ReadyToRunSe [RuntimeImport(RuntimeLibrary, "RhGetThreadStaticStorage")] internal static extern ref object[][] RhGetThreadStaticStorage(); - [MethodImplAttribute(MethodImplOptions.InternalCall)] - [RuntimeImport(RuntimeLibrary, "RhGetInlinedThreadStaticStorage")] - internal static extern ref object? RhGetInlinedThreadStaticStorage(); - [MethodImplAttribute(MethodImplOptions.InternalCall)] [RuntimeImport(RuntimeLibrary, "RhRegisterInlinedThreadStaticRoot")] internal static extern void RhRegisterInlinedThreadStaticRoot(ref object? root); From e1d070d64b6cac3b9f0ccef9463c2a165ac71896 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Sat, 3 Jun 2023 22:40:28 -0700 Subject: [PATCH 03/24] get rid of c++ tls_InlinedThreadStatics --- src/coreclr/nativeaot/Runtime/threadstore.cpp | 5 ----- src/coreclr/nativeaot/Runtime/threadstore.inl | 6 ------ .../Compiler/DependencyAnalysis/TlsRootNode.cs | 2 +- 3 files changed, 1 insertion(+), 12 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/threadstore.cpp b/src/coreclr/nativeaot/Runtime/threadstore.cpp index 53b7a2b6a8a6e7..67a6949fd7fb06 100644 --- a/src/coreclr/nativeaot/Runtime/threadstore.cpp +++ b/src/coreclr/nativeaot/Runtime/threadstore.cpp @@ -430,11 +430,6 @@ C_ASSERT(sizeof(Thread) == sizeof(ThreadBuffer)); #ifndef _MSC_VER __thread ThreadBuffer tls_CurrentThread; - -// the root of inlined threadstatics storage -// there is only one now, -// eventually this will be emitted by ILC and we may have more than one such variable -__thread InlinedThreadStaticRoot tls_InlinedThreadStatics; #endif EXTERN_C ThreadBuffer* RhpGetThread() diff --git a/src/coreclr/nativeaot/Runtime/threadstore.inl b/src/coreclr/nativeaot/Runtime/threadstore.inl index 6fe750f4b01c01..29495046a98272 100644 --- a/src/coreclr/nativeaot/Runtime/threadstore.inl +++ b/src/coreclr/nativeaot/Runtime/threadstore.inl @@ -4,14 +4,8 @@ #ifdef _MSC_VER // a workaround to prevent tls_CurrentThread from becoming dynamically checked/initialized. EXTERN_C __declspec(selectany) __declspec(thread) ThreadBuffer tls_CurrentThread; - -// the root of inlined threadstatics storage -// there is only one now, -// eventually this will be emitted by ILC and we may have more than one such variable -EXTERN_C __declspec(selectany) __declspec(thread) InlinedThreadStaticRoot tls_InlinedThreadStatics; #else EXTERN_C __thread ThreadBuffer tls_CurrentThread; -EXTERN_C __thread InlinedThreadStaticRoot tls_InlinedThreadStatics; #endif // static diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs index 92f83d655beaff..240edab03b2493 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs @@ -10,7 +10,7 @@ public class TlsRootNode : ObjectNode, ISymbolDefinitionNode public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb) { // tls_InlinedThreadStatics - sb.Append(nameMangler.CompilationUnitPrefix).Append("__tls_root"); + sb.Append(nameMangler.CompilationUnitPrefix).Append("tls_InlinedThreadStatics"); } public int Offset => 0; public override bool IsShareable => false; From f5812e7d4ffe1da4b3e4f0138dc3b216b6563edb Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Sun, 4 Jun 2023 16:49:07 -0700 Subject: [PATCH 04/24] remove GetSingleTypeManager --- .../nativeaot/Runtime/RuntimeInstance.cpp | 19 ------------------- .../nativeaot/Runtime/RuntimeInstance.h | 1 - .../src/Internal/Runtime/ThreadStatics.cs | 14 ++++++-------- .../src/System/Runtime/RuntimeImports.cs | 4 ---- .../DependencyAnalysis/TlsRootNode.cs | 3 --- .../TypeThreadStaticIndexNode.cs | 4 ---- 6 files changed, 6 insertions(+), 39 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp b/src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp index 2d1a6971b10fb8..e30e8952389900 100644 --- a/src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp +++ b/src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp @@ -267,25 +267,6 @@ RuntimeInstance::TypeManagerList& RuntimeInstance::GetTypeManagerList() return m_TypeManagerList; } -TypeManager* RuntimeInstance::GetSingleTypeManager() -{ - auto head = m_TypeManagerList.GetHead(); - if (head != NULL && head->m_pNext == NULL) - { - return head->m_pTypeManager; - } - - return NULL; -} - -COOP_PINVOKE_HELPER(TypeManagerHandle, RhGetSingleTypeManager, ()) -{ - TypeManager* typeManager = GetRuntimeInstance()->GetSingleTypeManager(); - ASSERT(typeManager != NULL); - - return TypeManagerHandle::Create(typeManager); -} - // static bool RuntimeInstance::Initialize(HANDLE hPalInstance) { diff --git a/src/coreclr/nativeaot/Runtime/RuntimeInstance.h b/src/coreclr/nativeaot/Runtime/RuntimeInstance.h index 06db50839eebac..2de7c220f09128 100644 --- a/src/coreclr/nativeaot/Runtime/RuntimeInstance.h +++ b/src/coreclr/nativeaot/Runtime/RuntimeInstance.h @@ -99,7 +99,6 @@ class RuntimeInstance bool RegisterTypeManager(TypeManager * pTypeManager); TypeManagerList& GetTypeManagerList(); - TypeManager* GetSingleTypeManager(); OsModuleList* GetOsModuleList(); bool RegisterUnboxingStubs(PTR_VOID pvStartRange, uint32_t cbRange); diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/ThreadStatics.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/ThreadStatics.cs index 594ea389c73fe2..1db0d4c4b8a638 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/ThreadStatics.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/ThreadStatics.cs @@ -37,9 +37,7 @@ internal static unsafe object GetInlinedThreadStaticBaseSlow(ref object? threadS { Debug.Assert(threadStorage == null); // Allocate an object that will represent a memory block for all thread static fields - - // TODO: VS fetch the type manager from the threadStorage location - TypeManagerHandle typeManager = RuntimeImports.RhGetSingleTypeManager(); + TypeManagerHandle typeManager = (new object()).GetMethodTable()->TypeManager; object threadStaticBase = AllocateThreadStaticStorageForType(typeManager, 0); // register the storage location with the thread for GC reporting. @@ -58,13 +56,13 @@ internal static unsafe object GetUninlinedThreadStaticBaseForType(TypeManagerSlo int moduleIndex = pModuleData->ModuleIndex; Debug.Assert(moduleIndex >= 0); - object[][] threadStorage = RuntimeImports.RhGetThreadStaticStorage(); - if (threadStorage != null && threadStorage.Length > moduleIndex) + object[][] perThreadStorage = RuntimeImports.RhGetThreadStaticStorage(); + if (perThreadStorage != null && perThreadStorage.Length > moduleIndex) { - object[] moduleStorage = threadStorage[moduleIndex]; - if (moduleStorage != null && moduleStorage.Length > typeTlsIndex) + object[] perModuleStorage = perThreadStorage[moduleIndex]; + if (perModuleStorage != null && perModuleStorage.Length > typeTlsIndex) { - object threadStaticBase = moduleStorage[typeTlsIndex]; + object threadStaticBase = perModuleStorage[typeTlsIndex]; if (threadStaticBase != null) { return threadStaticBase; diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/RuntimeImports.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/RuntimeImports.cs index 9ce06ca443fd8f..c6ac7d60c9990b 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/RuntimeImports.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/RuntimeImports.cs @@ -588,10 +588,6 @@ internal static IntPtr RhGetModuleSection(TypeManagerHandle module, ReadyToRunSe [RuntimeImport(RuntimeLibrary, "RhGetTargetOfUnboxingAndInstantiatingStub")] public static extern IntPtr RhGetTargetOfUnboxingAndInstantiatingStub(IntPtr pCode); - [MethodImplAttribute(MethodImplOptions.InternalCall)] - [RuntimeImport(RuntimeLibrary, "RhGetSingleTypeManager")] - public static extern TypeManagerHandle RhGetSingleTypeManager(); - // // EH helpers // diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs index 240edab03b2493..288a541c444e7f 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs @@ -33,9 +33,6 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false) // next objData.EmitZeroPointer(); - // type manager - objData.EmitPointerReloc(factory.TypeManagerIndirection); - return objData.ToObjectData(); } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TypeThreadStaticIndexNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TypeThreadStaticIndexNode.cs index 4d020123e9baaa..9a5d44d3580cc8 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TypeThreadStaticIndexNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TypeThreadStaticIndexNode.cs @@ -84,16 +84,12 @@ protected override ObjectData GetDehydratableData(NodeFactory factory, bool relo } // needed to construct storage. - // also need to put in the root, so that we could drop SingleTypeManager thing. objData.EmitPointerReloc(factory.TypeManagerIndirection); // tls storage ID for uninlined types. used to get the type from the type manager // and slot from the array objData.EmitNaturalInt(typeTlsIndex); - // need to emit TLS reloc for the factory.InlinedTlsRoot - // we will pass that to the helper (will use SingletypeManager for now, but later arrange typeman in the TLS) - return objData.ToObjectData(); } From aeee60961ab5fc0c093b5b0cae3cf650a01734d8 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Sun, 4 Jun 2023 19:31:06 -0700 Subject: [PATCH 05/24] use .tdata on unix --- .../Compiler/DependencyAnalysis/ObjectNodeSection.cs | 9 ++++++++- .../Compiler/DependencyAnalysis/TlsRootNode.cs | 5 +++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/ObjectNodeSection.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/ObjectNodeSection.cs index 6f0c0bec5aba19..0cd1a4bd707f10 100644 --- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/ObjectNodeSection.cs +++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/ObjectNodeSection.cs @@ -46,7 +46,14 @@ public bool IsStandardSection public static readonly ObjectNodeSection ReadOnlyDataSection = new ObjectNodeSection("rdata", SectionType.ReadOnly); public static readonly ObjectNodeSection FoldableReadOnlyDataSection = new ObjectNodeSection("rdata", SectionType.ReadOnly); public static readonly ObjectNodeSection TextSection = new ObjectNodeSection("text", SectionType.Executable); - public static readonly ObjectNodeSection TLSSection = new ObjectNodeSection("TLS", SectionType.Writeable); + + public static readonly ObjectNodeSection TLSSectionUnix = new ObjectNodeSection(".tdata", SectionType.Writeable); + + // UNDONE: section is irrelevant on Windows (typically .rdata) as long as the TLS template image is contiguous and + // referenced via _tls_used, _tls_start, _tls_end. + // not sure why the following "works". + public static readonly ObjectNodeSection TLSSectionWindows = new ObjectNodeSection("FIXME", SectionType.Writeable); + public static readonly ObjectNodeSection BssSection = new ObjectNodeSection("bss", SectionType.Uninitialized); public static readonly ObjectNodeSection HydrationTargetSection = new ObjectNodeSection("hydrated", SectionType.Uninitialized); public static readonly ObjectNodeSection ManagedCodeWindowsContentSection = new ObjectNodeSection(".managedcode$I", SectionType.Executable); diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs index 288a541c444e7f..6c1e473ed517a0 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs @@ -13,11 +13,12 @@ public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb) sb.Append(nameMangler.CompilationUnitPrefix).Append("tls_InlinedThreadStatics"); } public int Offset => 0; - public override bool IsShareable => false; + public override bool IsShareable => true; protected override string GetName(NodeFactory factory) => this.GetMangledName(factory.NameMangler); - public override ObjectNodeSection GetSection(NodeFactory factory) => ObjectNodeSection.TLSSection; + public override ObjectNodeSection GetSection(NodeFactory factory) => + factory.Target.IsWindows ? ObjectNodeSection.TLSSectionWindows : ObjectNodeSection.TLSSectionUnix; public override bool StaticDependenciesAreComputed => true; From 447969ce1b9d7290f72cbd1e619eb923875ae204 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Mon, 5 Jun 2023 13:27:38 -0700 Subject: [PATCH 06/24] do not switch OSX just yet --- src/coreclr/nativeaot/Runtime/threadstore.cpp | 7 +++++++ .../Compiler/DependencyAnalysis/NodeFactory.cs | 7 ++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/coreclr/nativeaot/Runtime/threadstore.cpp b/src/coreclr/nativeaot/Runtime/threadstore.cpp index 67a6949fd7fb06..fac1c739f6daff 100644 --- a/src/coreclr/nativeaot/Runtime/threadstore.cpp +++ b/src/coreclr/nativeaot/Runtime/threadstore.cpp @@ -430,6 +430,13 @@ C_ASSERT(sizeof(Thread) == sizeof(ThreadBuffer)); #ifndef _MSC_VER __thread ThreadBuffer tls_CurrentThread; + +#if defined(__APPLE__) +// the root of inlined threadstatics storage +// eventually this will be emitted by ILC and we may have more than one such variable +__thread InlinedThreadStaticRoot tls_InlinedThreadStatics; +#endif + #endif EXTERN_C ThreadBuffer* RhpGetThread() diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs index 56bc517f7f0659..550909912cfcfb 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs @@ -1301,7 +1301,12 @@ public virtual void AttachToDependencyGraph(DependencyAnalyzerBase graph.AddRoot(ThreadStaticsRegion, "ThreadStaticsRegion is always generated"); graph.AddRoot(EagerCctorTable, "EagerCctorTable is always generated"); graph.AddRoot(TypeManagerIndirection, "TypeManagerIndirection is always generated"); - graph.AddRoot(TlsRoot, "TlsRoot is always generated"); + + if (!_target.IsOSXLike) + { + graph.AddRoot(TlsRoot, "TlsRoot is always generated"); + } + graph.AddRoot(FrozenSegmentRegion, "FrozenSegmentRegion is always generated"); graph.AddRoot(InterfaceDispatchCellSection, "Interface dispatch cell section is always generated"); graph.AddRoot(ModuleInitializerList, "Module initializer list is always generated"); From 860ab1a85f02f75bdf95bc0ddcd063c2d007ba0d Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Wed, 7 Jun 2023 17:38:24 -0700 Subject: [PATCH 07/24] bring back tls_InlinedThreadStatics on Windows temporarily --- src/coreclr/nativeaot/Runtime/threadstore.cpp | 6 ++++++ .../Compiler/DependencyAnalysis/NodeFactory.cs | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/coreclr/nativeaot/Runtime/threadstore.cpp b/src/coreclr/nativeaot/Runtime/threadstore.cpp index fac1c739f6daff..5224bdfff22374 100644 --- a/src/coreclr/nativeaot/Runtime/threadstore.cpp +++ b/src/coreclr/nativeaot/Runtime/threadstore.cpp @@ -437,6 +437,12 @@ __thread ThreadBuffer tls_CurrentThread; __thread InlinedThreadStaticRoot tls_InlinedThreadStatics; #endif +#else +// the root of inlined threadstatics storage +// there is only one now, +// eventually this will be emitted by ILC and we may have more than one such variable +EXTERN_C __declspec(thread) InlinedThreadStaticRoot tls_InlinedThreadStatics; +__declspec(thread) InlinedThreadStaticRoot tls_InlinedThreadStatics; #endif EXTERN_C ThreadBuffer* RhpGetThread() diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs index 550909912cfcfb..8ec1185069aa76 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs @@ -1302,7 +1302,7 @@ public virtual void AttachToDependencyGraph(DependencyAnalyzerBase graph.AddRoot(EagerCctorTable, "EagerCctorTable is always generated"); graph.AddRoot(TypeManagerIndirection, "TypeManagerIndirection is always generated"); - if (!_target.IsOSXLike) + if (!_target.IsOSXLike && !_target.IsWindows) { graph.AddRoot(TlsRoot, "TlsRoot is always generated"); } From 4e3a8e545302ee15220126ef5a6f550385d56b4d Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Thu, 8 Jun 2023 11:12:23 -0700 Subject: [PATCH 08/24] emit inline access on windows --- .../Target_X64/X64Emitter.cs | 14 ++++ .../DependencyAnalysis/HelperEntrypoint.cs | 1 + .../DependencyAnalysis/NodeFactory.cs | 3 +- .../Target_X64/X64ReadyToRunHelperNode.cs | 73 ++++++++++++++++--- 4 files changed, 81 insertions(+), 10 deletions(-) diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_X64/X64Emitter.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_X64/X64Emitter.cs index b88bb7f1002c57..321da23b856647 100644 --- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_X64/X64Emitter.cs +++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_X64/X64Emitter.cs @@ -117,6 +117,20 @@ public void EmitJE(ISymbolNode symbol) } } + public void EmitJNE(ISymbolNode symbol) + { + if (symbol.RepresentsIndirectionCell) + { + throw new NotImplementedException(); + } + else + { + Builder.EmitByte(0x0f); + Builder.EmitByte(0x85); + Builder.EmitReloc(symbol, RelocType.IMAGE_REL_BASED_REL32); + } + } + public void EmitINT3() { Builder.EmitByte(0xCC); diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/HelperEntrypoint.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/HelperEntrypoint.cs index c739d0d7f0cccd..7a21199c8770e9 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/HelperEntrypoint.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/HelperEntrypoint.cs @@ -9,5 +9,6 @@ public enum HelperEntrypoint EnsureClassConstructorRunAndReturnNonGCStaticBase, EnsureClassConstructorRunAndReturnThreadStaticBase, GetThreadStaticBaseForType, + GetInlinedThreadStaticBaseSlow, } } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs index 8ec1185069aa76..c0f2150e020ce4 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs @@ -1041,7 +1041,8 @@ public IMethodNode ShadowConcreteMethod(MethodDesc method, bool isUnboxingStub = new string[] { "System.Runtime.CompilerServices", "ClassConstructorRunner", "CheckStaticClassConstructionReturnGCStaticBase" }, new string[] { "System.Runtime.CompilerServices", "ClassConstructorRunner", "CheckStaticClassConstructionReturnNonGCStaticBase" }, new string[] { "System.Runtime.CompilerServices", "ClassConstructorRunner", "CheckStaticClassConstructionReturnThreadStaticBase" }, - new string[] { "Internal.Runtime", "ThreadStatics", "GetThreadStaticBaseForType" } + new string[] { "Internal.Runtime", "ThreadStatics", "GetThreadStaticBaseForType" }, + new string[] { "Internal.Runtime", "ThreadStatics", "GetInlinedThreadStaticBaseSlow" }, }; private ISymbolNode[] _helperEntrypointSymbols; diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs index 15c0a98e3dfeba..c5868ccb215de7 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs @@ -74,25 +74,25 @@ protected override void EmitCode(NodeFactory factory, ref X64Emitter encoder, bo ISortableSymbolNode index = factory.TypeThreadStaticIndex(target); if (index is TypeThreadStaticIndexNode ti && ti.Type == null) { - ISymbolNode helper = factory.ExternSymbol("RhpGetInlinedThreadStaticBase"); - if (!factory.PreinitializationManager.HasLazyStaticConstructor(target)) { - encoder.EmitJMP(helper); + EmitInlineTLSAccess(factory, ref encoder); } else { + // First arg: unused address of the TypeManager + // encoder.EmitMOV(encoder.TargetRegister.Arg0, 0); + + // Second arg: -1 (index of inlined storage) + encoder.EmitMOV(encoder.TargetRegister.Arg1, -1); + encoder.EmitLEAQ(encoder.TargetRegister.Arg2, factory.TypeNonGCStaticsSymbol(target), -NonGCStaticsNode.GetClassConstructorContextSize(factory.Target)); AddrMode initialized = new AddrMode(encoder.TargetRegister.Arg2, null, 0, 0, AddrModeSize.Int64); encoder.EmitCMP(ref initialized, 0); - encoder.EmitJE(helper); + encoder.EmitJNE(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnThreadStaticBase)); - // First arg: unused address of the TypeManager - encoder.EmitMOV(encoder.TargetRegister.Arg0, 0); - // Second arg: -1 (index of inlined storage) - encoder.EmitMOV(encoder.TargetRegister.Arg1, -1); - encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnThreadStaticBase)); + EmitInlineTLSAccess(factory, ref encoder); } } else @@ -225,5 +225,60 @@ protected override void EmitCode(NodeFactory factory, ref X64Emitter encoder, bo throw new NotImplementedException(); } } + + private static void EmitInlineTLSAccess(NodeFactory factory, ref X64Emitter encoder) + { + ISymbolNode getInlinedThreadStaticBaseSlow = factory.HelperEntrypoint(HelperEntrypoint.GetInlinedThreadStaticBaseSlow); + + if (factory.Target.IsWindows) + { + // TODO: VS this should be "factory.TlsRoot" + ISymbolNode tlsRoot = factory.ExternSymbol("tls_InlinedThreadStatics"); + // TODO: VS can we know that we have a singlefile exe case? + bool singleFileExe = false; + if (singleFileExe) + { + // mov rax,qword ptr gs:[58h] + encoder.Builder.EmitBytes(new byte[] { 0x65, 0x48, 0x8B, 0x04, 0x25, 0x58, 0x00, 0x00, 0x00 }); + + // mov ecx,[tls_InlinedThreadStatics] + encoder.Builder.EmitBytes(new byte[] { 0xB9 }); + encoder.Builder.EmitReloc(tlsRoot, RelocType.IMAGE_REL_SECREL); + + // add rcx,qword ptr [rax] + encoder.Builder.EmitBytes(new byte[] { 0x48, 0x03, 0x08 }); + } + else + { + // mov ecx,dword ptr [_tls_index] + encoder.Builder.EmitBytes(new byte[] { 0x8B, 0x0D }); + encoder.Builder.EmitReloc(factory.ExternSymbol("_tls_index"), RelocType.IMAGE_REL_BASED_REL32); + + // mov rax,qword ptr gs:[58h] + encoder.Builder.EmitBytes(new byte[] { 0x65, 0x48, 0x8B, 0x04, 0x25, 0x58, 0x00, 0x00, 0x00 }); + + // mov rax,qword ptr [rax+rcx*8] + encoder.Builder.EmitBytes(new byte[] { 0x48, 0x8B, 0x04, 0xC8 }); + + // mov ecx,[tls_InlinedThreadStatics] + encoder.Builder.EmitBytes(new byte[] { 0xB9 }); + encoder.Builder.EmitReloc(tlsRoot, RelocType.IMAGE_REL_SECREL); + + // add rcx,rax + encoder.Builder.EmitBytes(new byte[] { 0x48, 0x01, 0xC1 }); + } + + // mov rax, qword ptr[rcx] + encoder.Builder.EmitBytes(new byte[] { 0x48, 0x8b, 0x01 }); + encoder.EmitCompareToZero(Register.RAX); + encoder.EmitJE(getInlinedThreadStaticBaseSlow); + encoder.EmitRET(); + } + else + { + ISymbolNode helper = factory.ExternSymbol("RhpGetInlinedThreadStaticBase"); + encoder.EmitJMP(helper); + } + } } } From d96709cd61800368487fb3cbd34e0daa8a40fbd6 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Thu, 8 Jun 2023 13:43:42 -0700 Subject: [PATCH 09/24] unify tls sections --- .../Compiler/DependencyAnalysis/ObjectNodeSection.cs | 9 +-------- .../Compiler/DependencyAnalysis/TlsRootNode.cs | 3 +-- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/ObjectNodeSection.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/ObjectNodeSection.cs index 0cd1a4bd707f10..4545833ab449c2 100644 --- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/ObjectNodeSection.cs +++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/ObjectNodeSection.cs @@ -46,14 +46,7 @@ public bool IsStandardSection public static readonly ObjectNodeSection ReadOnlyDataSection = new ObjectNodeSection("rdata", SectionType.ReadOnly); public static readonly ObjectNodeSection FoldableReadOnlyDataSection = new ObjectNodeSection("rdata", SectionType.ReadOnly); public static readonly ObjectNodeSection TextSection = new ObjectNodeSection("text", SectionType.Executable); - - public static readonly ObjectNodeSection TLSSectionUnix = new ObjectNodeSection(".tdata", SectionType.Writeable); - - // UNDONE: section is irrelevant on Windows (typically .rdata) as long as the TLS template image is contiguous and - // referenced via _tls_used, _tls_start, _tls_end. - // not sure why the following "works". - public static readonly ObjectNodeSection TLSSectionWindows = new ObjectNodeSection("FIXME", SectionType.Writeable); - + public static readonly ObjectNodeSection TLSSection = new ObjectNodeSection("tdata", SectionType.Writeable); public static readonly ObjectNodeSection BssSection = new ObjectNodeSection("bss", SectionType.Uninitialized); public static readonly ObjectNodeSection HydrationTargetSection = new ObjectNodeSection("hydrated", SectionType.Uninitialized); public static readonly ObjectNodeSection ManagedCodeWindowsContentSection = new ObjectNodeSection(".managedcode$I", SectionType.Executable); diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs index 6c1e473ed517a0..cea6c484d6d13e 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs @@ -17,8 +17,7 @@ public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb) protected override string GetName(NodeFactory factory) => this.GetMangledName(factory.NameMangler); - public override ObjectNodeSection GetSection(NodeFactory factory) => - factory.Target.IsWindows ? ObjectNodeSection.TLSSectionWindows : ObjectNodeSection.TLSSectionUnix; + public override ObjectNodeSection GetSection(NodeFactory factory) => ObjectNodeSection.TLSSection; public override bool StaticDependenciesAreComputed => true; From 790dc9b06ceec472682515ab14c08b75386b4d0e Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Thu, 8 Jun 2023 13:45:04 -0700 Subject: [PATCH 10/24] inline TLS access on linux-x64 --- src/coreclr/nativeaot/Runtime/threadstore.cpp | 2 +- .../Runtime/unix/unixasmmacrosamd64.inc | 7 ++- .../DependencyAnalysis/ObjectDataBuilder.cs | 2 + .../Compiler/DependencyAnalysis/Relocation.cs | 12 ++++- .../DependencyAnalysis/NodeFactory.cs | 3 +- .../Target_X64/X64ReadyToRunHelperNode.cs | 48 +++++++++++++++++-- .../DependencyAnalysis/TlsRootNode.cs | 4 +- 7 files changed, 67 insertions(+), 11 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/threadstore.cpp b/src/coreclr/nativeaot/Runtime/threadstore.cpp index 5224bdfff22374..ed38df9a55da0f 100644 --- a/src/coreclr/nativeaot/Runtime/threadstore.cpp +++ b/src/coreclr/nativeaot/Runtime/threadstore.cpp @@ -431,7 +431,7 @@ C_ASSERT(sizeof(Thread) == sizeof(ThreadBuffer)); #ifndef _MSC_VER __thread ThreadBuffer tls_CurrentThread; -#if defined(__APPLE__) +#if defined(__APPLE__) || defined(TARGET_ARM64) // the root of inlined threadstatics storage // eventually this will be emitted by ILC and we may have more than one such variable __thread InlinedThreadStaticRoot tls_InlinedThreadStatics; diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc index 1aaf7c53ff6157..ab64ef9ce92b49 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc @@ -288,9 +288,12 @@ C_FUNC(\Name): movq _\Var@TLVP(%rip), %rdi callq *(%rdi) #else - leaq \Var@TLSLD(%rip), %rdi + .byte 0x66 // data16 prefix - padding to have space for linker relaxations + leaq \Var@TLSGD(%rip), %rdi + .byte 0x66 // + .byte 0x66 // + .byte 0x48 // rex.W prefix, also for padding callq __tls_get_addr@PLT - addq $\Var@DTPOFF, %rax #endif .intel_syntax noprefix .endm diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/ObjectDataBuilder.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/ObjectDataBuilder.cs index ff5cee89663c6b..4369699dcb9d12 100644 --- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/ObjectDataBuilder.cs +++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/ObjectDataBuilder.cs @@ -291,6 +291,8 @@ public void EmitReloc(ISymbolNode symbol, RelocType relocType, int delta = 0) case RelocType.IMAGE_REL_BASED_ABSOLUTE: case RelocType.IMAGE_REL_BASED_HIGHLOW: case RelocType.IMAGE_REL_SECREL: + case RelocType.IMAGE_REL_TLSGD: + case RelocType.IMAGE_REL_TPOFF: case RelocType.IMAGE_REL_FILE_ABSOLUTE: case RelocType.IMAGE_REL_BASED_ADDR32NB: case RelocType.IMAGE_REL_SYMBOL_SIZE: diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs index 25b94a37654218..59811a9c52b1ba 100644 --- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs +++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs @@ -23,16 +23,22 @@ public enum RelocType // This is a special NGEN-specific relocation type // for relative pointer (used to make NGen relocation // section smaller) - IMAGE_REL_SECREL = 0x80, // 32 bit offset from base of section containing target IMAGE_REL_BASED_ARM64_PAGEBASE_REL21 = 0x81, // ADRP IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A = 0x82, // ADD/ADDS (immediate) with zero shift, for page offset IMAGE_REL_BASED_ARM64_PAGEOFFSET_12L = 0x83, // LDR (indexed, unsigned immediate), for page offset + // + // Relocation operators related to TLS access + // + IMAGE_REL_SECREL = 0x104, // 32 bit offset from base of section containing target + IMAGE_REL_TLSGD = 0x105, + IMAGE_REL_TPOFF = 0x106, + // // Relocations for R2R image production // - IMAGE_REL_SYMBOL_SIZE = 0x1000, // The size of data in the image represented by the target symbol node + IMAGE_REL_SYMBOL_SIZE = 0x1000, // The size of data in the image represented by the target symbol node IMAGE_REL_FILE_ABSOLUTE = 0x1001, // 32 bit offset from beginning of image } @@ -459,6 +465,8 @@ public static unsafe long ReadValue(RelocType relocType, void* location) case RelocType.IMAGE_REL_BASED_REL32: case RelocType.IMAGE_REL_BASED_RELPTR32: case RelocType.IMAGE_REL_SECREL: + case RelocType.IMAGE_REL_TLSGD: + case RelocType.IMAGE_REL_TPOFF: case RelocType.IMAGE_REL_FILE_ABSOLUTE: case RelocType.IMAGE_REL_SYMBOL_SIZE: return *(int*)location; diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs index c0f2150e020ce4..5dfaefca3a564c 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs @@ -1303,7 +1303,8 @@ public virtual void AttachToDependencyGraph(DependencyAnalyzerBase graph.AddRoot(EagerCctorTable, "EagerCctorTable is always generated"); graph.AddRoot(TypeManagerIndirection, "TypeManagerIndirection is always generated"); - if (!_target.IsOSXLike && !_target.IsWindows) + if (_target.OperatingSystem == TargetOS.Linux && + _target.Architecture == TargetArchitecture.X64) { graph.AddRoot(TlsRoot, "TlsRoot is always generated"); } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs index c5868ccb215de7..867bd203b992aa 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs @@ -229,11 +229,13 @@ protected override void EmitCode(NodeFactory factory, ref X64Emitter encoder, bo private static void EmitInlineTLSAccess(NodeFactory factory, ref X64Emitter encoder) { ISymbolNode getInlinedThreadStaticBaseSlow = factory.HelperEntrypoint(HelperEntrypoint.GetInlinedThreadStaticBaseSlow); + ISymbolNode tlsRoot = factory.TlsRoot; if (factory.Target.IsWindows) { - // TODO: VS this should be "factory.TlsRoot" - ISymbolNode tlsRoot = factory.ExternSymbol("tls_InlinedThreadStatics"); + // TODO: VS HACK until we emit proper TLS template on Windows + tlsRoot = factory.ExternSymbol("tls_InlinedThreadStatics"); + // TODO: VS can we know that we have a singlefile exe case? bool singleFileExe = false; if (singleFileExe) @@ -268,12 +270,52 @@ private static void EmitInlineTLSAccess(NodeFactory factory, ref X64Emitter enco encoder.Builder.EmitBytes(new byte[] { 0x48, 0x01, 0xC1 }); } - // mov rax, qword ptr[rcx] + // mov rax, qword ptr[rcx] encoder.Builder.EmitBytes(new byte[] { 0x48, 0x8b, 0x01 }); encoder.EmitCompareToZero(Register.RAX); encoder.EmitJE(getInlinedThreadStaticBaseSlow); encoder.EmitRET(); } + else if (factory.Target.OperatingSystem == TargetOS.Linux) + { + // TODO: VS can we know that we have a singlefile exe case? + bool singleFileExe = false; + if (singleFileExe) + { + // movq %fs:0x0,%rax + encoder.Builder.EmitBytes(new byte[] { 0x64, 0x48, 0x8B, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00 }); + + // leaq tlsRoot@TPOFF(%rax), %rdi + encoder.Builder.EmitBytes(new byte[] { 0x48, 0x8D, 0xB8 }); + encoder.Builder.EmitReloc(tlsRoot, RelocType.IMAGE_REL_TPOFF); + } + else + { + // data16 leaq tlsRoot@TLSGD(%rip), %rdi + encoder.Builder.EmitBytes(new byte[] { 0x66, 0x48, 0x8D, 0x3D }); + encoder.Builder.EmitReloc(tlsRoot, RelocType.IMAGE_REL_TLSGD, -4); + + // data16 data16 rex.W callq __tls_get_addr@PLT + encoder.Builder.EmitBytes(new byte[] { 0x66, 0x66, 0x48, 0xE8 }); + encoder.Builder.EmitReloc(factory.ExternSymbol("__tls_get_addr"), RelocType.IMAGE_REL_BASED_REL32, -4); + + encoder.EmitMOV(Register.RDI, Register.RAX); + } + + // mov rax, qword ptr[rdi] + encoder.Builder.EmitBytes(new byte[] { 0x48, 0x8B, 0x07 }); + encoder.EmitCompareToZero(Register.RAX); + encoder.EmitJE(getInlinedThreadStaticBaseSlow); + encoder.EmitRET(); + } + else if (factory.Target.IsOSXLike) + { + // movq _\Var @TLVP(% rip), % rdi + // callq * (% rdi) + + ISymbolNode helper = factory.ExternSymbol("RhpGetInlinedThreadStaticBase"); + encoder.EmitJMP(helper); + } else { ISymbolNode helper = factory.ExternSymbol("RhpGetInlinedThreadStaticBase"); diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs index cea6c484d6d13e..be4e7e58db9686 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs @@ -13,7 +13,7 @@ public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb) sb.Append(nameMangler.CompilationUnitPrefix).Append("tls_InlinedThreadStatics"); } public int Offset => 0; - public override bool IsShareable => true; + public override bool IsShareable => false; protected override string GetName(NodeFactory factory) => this.GetMangledName(factory.NameMangler); @@ -24,8 +24,8 @@ public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb) public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false) { ObjectDataBuilder objData = new ObjectDataBuilder(factory, relocsOnly); - objData.AddSymbol(this); objData.RequireInitialPointerAlignment(); + objData.AddSymbol(this); // root objData.EmitZeroPointer(); From 7a4d9038314a39af5ab2a2c2a9a261078dbc736c Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Fri, 9 Jun 2023 13:16:21 -0700 Subject: [PATCH 11/24] no need for RhpGetInlinedThreadStaticBase when inlining the access --- src/coreclr/nativeaot/Runtime/amd64/MiscStubs.S | 5 ++++- .../nativeaot/Runtime/amd64/MiscStubs.asm | 16 ---------------- 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/amd64/MiscStubs.S b/src/coreclr/nativeaot/Runtime/amd64/MiscStubs.S index c2247d8be5e24c..e48f96b22722ea 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/MiscStubs.S +++ b/src/coreclr/nativeaot/Runtime/amd64/MiscStubs.S @@ -45,7 +45,9 @@ LOCAL_LABEL(ProbeLoop): ret NESTED_END RhpStackProbe, _TEXT -#ifndef TARGET_ANDROID +// this helper is a last resort fallback for platforms that do not inline TLS access sequences +#if !defined(TARGET_LINUX) + NESTED_ENTRY RhpGetInlinedThreadStaticBase, _TEXT, NoHandler // On exit: // rax - the thread static base for the given type @@ -62,4 +64,5 @@ NESTED_ENTRY RhpGetInlinedThreadStaticBase, _TEXT, NoHandler // return it ret NESTED_END RhpGetInlinedThreadStaticBase, _TEXT + #endif diff --git a/src/coreclr/nativeaot/Runtime/amd64/MiscStubs.asm b/src/coreclr/nativeaot/Runtime/amd64/MiscStubs.asm index c4f39f51345585..f66decc97738eb 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/MiscStubs.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/MiscStubs.asm @@ -39,20 +39,4 @@ ProbeLoop: LEAF_END RhpStackProbe, _TEXT -LEAF_ENTRY RhpGetInlinedThreadStaticBase, _TEXT - ; On exit: - ; rax - the thread static base for the given type - - ;; rcx = &tls_InlinedThreadStatics, TRASHES r8 - INLINE_GET_TLS_VAR rcx, r8, tls_InlinedThreadStatics - - ;; get per-thread storage - mov rax, [rcx] - test rax, rax - jz RhpGetInlinedThreadStaticBaseSlow ;; rcx contains the storage ref - - ;; return it - ret -LEAF_END RhpGetInlinedThreadStaticBase, _TEXT - end From a585ff6c312f0a0379b74ecc051b96883ba4f45f Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Fri, 9 Jun 2023 14:04:41 -0700 Subject: [PATCH 12/24] some comments and TODOs --- .../Target_X64/X64ReadyToRunHelperNode.cs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs index 867bd203b992aa..ea4ad09cf43679 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs @@ -226,18 +226,19 @@ protected override void EmitCode(NodeFactory factory, ref X64Emitter encoder, bo } } + // emits code that results in ThreadStaticBase referenced in RAX. + // may trash volatile registers. (there are calls to the slow helper and possibly to platform's TLS support) private static void EmitInlineTLSAccess(NodeFactory factory, ref X64Emitter encoder) { ISymbolNode getInlinedThreadStaticBaseSlow = factory.HelperEntrypoint(HelperEntrypoint.GetInlinedThreadStaticBaseSlow); ISymbolNode tlsRoot = factory.TlsRoot; + bool singleFileExe = factory.CompilationModuleGroup.IsSingleFileCompilation; if (factory.Target.IsWindows) { - // TODO: VS HACK until we emit proper TLS template on Windows + // TODO: VS this is a temporary HACK. + // until we emit proper TLS template on Windows, we will emit one in c++ and bind to that. tlsRoot = factory.ExternSymbol("tls_InlinedThreadStatics"); - - // TODO: VS can we know that we have a singlefile exe case? - bool singleFileExe = false; if (singleFileExe) { // mov rax,qword ptr gs:[58h] @@ -278,8 +279,6 @@ private static void EmitInlineTLSAccess(NodeFactory factory, ref X64Emitter enco } else if (factory.Target.OperatingSystem == TargetOS.Linux) { - // TODO: VS can we know that we have a singlefile exe case? - bool singleFileExe = false; if (singleFileExe) { // movq %fs:0x0,%rax From 62b60de71f7e066b75878877faa96ae831b28143 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Fri, 9 Jun 2023 16:18:00 -0700 Subject: [PATCH 13/24] enable ILC generation of tls_InlinedThreadStatics on win-x64 --- src/coreclr/nativeaot/Runtime/threadstore.cpp | 5 ++--- .../Compiler/DependencyAnalysis/NodeFactory.cs | 4 ++-- .../DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs | 3 --- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/threadstore.cpp b/src/coreclr/nativeaot/Runtime/threadstore.cpp index ed38df9a55da0f..81f9d5ad7dc682 100644 --- a/src/coreclr/nativeaot/Runtime/threadstore.cpp +++ b/src/coreclr/nativeaot/Runtime/threadstore.cpp @@ -437,9 +437,8 @@ __thread ThreadBuffer tls_CurrentThread; __thread InlinedThreadStaticRoot tls_InlinedThreadStatics; #endif -#else -// the root of inlined threadstatics storage -// there is only one now, +#elif defined(TARGET_ARM64) +// the root of inlined threadstatics storage for ARM64 // eventually this will be emitted by ILC and we may have more than one such variable EXTERN_C __declspec(thread) InlinedThreadStaticRoot tls_InlinedThreadStatics; __declspec(thread) InlinedThreadStaticRoot tls_InlinedThreadStatics; diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs index 5dfaefca3a564c..9ec043d4eca706 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs @@ -1303,8 +1303,8 @@ public virtual void AttachToDependencyGraph(DependencyAnalyzerBase graph.AddRoot(EagerCctorTable, "EagerCctorTable is always generated"); graph.AddRoot(TypeManagerIndirection, "TypeManagerIndirection is always generated"); - if (_target.OperatingSystem == TargetOS.Linux && - _target.Architecture == TargetArchitecture.X64) + if (_target.Architecture == TargetArchitecture.X64 && + (_target.OperatingSystem == TargetOS.Linux || _target.OperatingSystem == TargetOS.Windows)) { graph.AddRoot(TlsRoot, "TlsRoot is always generated"); } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs index ea4ad09cf43679..2f26041c421feb 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs @@ -236,9 +236,6 @@ private static void EmitInlineTLSAccess(NodeFactory factory, ref X64Emitter enco if (factory.Target.IsWindows) { - // TODO: VS this is a temporary HACK. - // until we emit proper TLS template on Windows, we will emit one in c++ and bind to that. - tlsRoot = factory.ExternSymbol("tls_InlinedThreadStatics"); if (singleFileExe) { // mov rax,qword ptr gs:[58h] From 2921dc48d401f01577f623a58f2fcb736688b026 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Sun, 11 Jun 2023 23:21:12 -0700 Subject: [PATCH 14/24] allow storage inlining in multimodule case when TLS access is inlined --- src/coreclr/tools/aot/ILCompiler/Program.cs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/coreclr/tools/aot/ILCompiler/Program.cs b/src/coreclr/tools/aot/ILCompiler/Program.cs index ae5e9f1fbf53c1..253527f1c9c128 100644 --- a/src/coreclr/tools/aot/ILCompiler/Program.cs +++ b/src/coreclr/tools/aot/ILCompiler/Program.cs @@ -494,7 +494,9 @@ void RunScanner() // If we have a scanner, we can inline threadstatics storage using the information // we collected at scanning time. // Inlined storage implies a single type manager, thus we do not do it in multifile case. - if (!multiFile && !Get(_command.NoInlineTls)) + // Linux and Windows on x64 no longer have this limitation since TLS access is inlined into the calling module. + if (!multiFile || + (targetArchitecture == TargetArchitecture.X64 && (targetOS == TargetOS.Windows || targetOS == TargetOS.Linux))) { builder.UseInlinedThreadStatics(scanResults.GetInlinedThreadStatics()); } From 97cf520e38b83164d01eefad37064ae46dd05a2c Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Mon, 12 Jun 2023 00:21:44 -0700 Subject: [PATCH 15/24] disable "Initial Exec" optimizations --- .../Target_X64/X64ReadyToRunHelperNode.cs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs index 2f26041c421feb..150643a9765787 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs @@ -232,7 +232,10 @@ private static void EmitInlineTLSAccess(NodeFactory factory, ref X64Emitter enco { ISymbolNode getInlinedThreadStaticBaseSlow = factory.HelperEntrypoint(HelperEntrypoint.GetInlinedThreadStaticBaseSlow); ISymbolNode tlsRoot = factory.TlsRoot; - bool singleFileExe = factory.CompilationModuleGroup.IsSingleFileCompilation; + // IsSingleFileCompilation is not enough to guarantee that we can use "Initial Executable" optimizations. + // we need a special compiler flag analogous to /GA. Just assume "false" for now. + // bool singleFileExe = factory.CompilationModuleGroup.IsSingleFileCompilation; + bool singleFileExe = false; if (factory.Target.IsWindows) { @@ -293,7 +296,7 @@ private static void EmitInlineTLSAccess(NodeFactory factory, ref X64Emitter enco // data16 data16 rex.W callq __tls_get_addr@PLT encoder.Builder.EmitBytes(new byte[] { 0x66, 0x66, 0x48, 0xE8 }); - encoder.Builder.EmitReloc(factory.ExternSymbol("__tls_get_addr"), RelocType.IMAGE_REL_BASED_REL32, -4); + encoder.Builder.EmitReloc(factory.ExternSymbol("__tls_get_addr"), RelocType.IMAGE_REL_BASED_REL32); encoder.EmitMOV(Register.RDI, Register.RAX); } From 8dfa7e996bd6ec652165435660a53bb04c56fead Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Wed, 14 Jun 2023 11:27:19 -0700 Subject: [PATCH 16/24] some comments and formatting --- src/coreclr/nativeaot/Runtime/threadstore.cpp | 7 +++---- .../tools/Common/Compiler/DependencyAnalysis/Relocation.cs | 2 +- .../Compiler/DependencyAnalysis/TlsRootNode.cs | 1 - .../DependencyAnalysis/TypeThreadStaticIndexNode.cs | 7 ++++--- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/threadstore.cpp b/src/coreclr/nativeaot/Runtime/threadstore.cpp index 81f9d5ad7dc682..2562b3067adbc6 100644 --- a/src/coreclr/nativeaot/Runtime/threadstore.cpp +++ b/src/coreclr/nativeaot/Runtime/threadstore.cpp @@ -431,15 +431,14 @@ C_ASSERT(sizeof(Thread) == sizeof(ThreadBuffer)); #ifndef _MSC_VER __thread ThreadBuffer tls_CurrentThread; +// tls_InlinedThreadStatics should be emitted by ILC and that is already the case for win-x64 and linux-x64 +// the remaining platforms use tls_InlinedThreadStatics defined here until + #if defined(__APPLE__) || defined(TARGET_ARM64) -// the root of inlined threadstatics storage -// eventually this will be emitted by ILC and we may have more than one such variable __thread InlinedThreadStaticRoot tls_InlinedThreadStatics; #endif #elif defined(TARGET_ARM64) -// the root of inlined threadstatics storage for ARM64 -// eventually this will be emitted by ILC and we may have more than one such variable EXTERN_C __declspec(thread) InlinedThreadStaticRoot tls_InlinedThreadStatics; __declspec(thread) InlinedThreadStaticRoot tls_InlinedThreadStatics; #endif diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs index 59811a9c52b1ba..95d86d7024368c 100644 --- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs +++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs @@ -38,7 +38,7 @@ public enum RelocType // // Relocations for R2R image production // - IMAGE_REL_SYMBOL_SIZE = 0x1000, // The size of data in the image represented by the target symbol node + IMAGE_REL_SYMBOL_SIZE = 0x1000, // The size of data in the image represented by the target symbol node IMAGE_REL_FILE_ABSOLUTE = 0x1001, // 32 bit offset from beginning of image } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs index be4e7e58db9686..d8a0fa1accedcd 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs @@ -36,7 +36,6 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false) return objData.ToObjectData(); } - // TODO: VS where this should come from? public override int ClassCode => -985742028; } } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TypeThreadStaticIndexNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TypeThreadStaticIndexNode.cs index 9a5d44d3580cc8..f91853d1834014 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TypeThreadStaticIndexNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TypeThreadStaticIndexNode.cs @@ -83,11 +83,12 @@ protected override ObjectData GetDehydratableData(NodeFactory factory, bool relo } } - // needed to construct storage. + // needed to construct storage objData.EmitPointerReloc(factory.TypeManagerIndirection); - // tls storage ID for uninlined types. used to get the type from the type manager - // and slot from the array + // tls storage ID for uninlined types. used to: + // - get the type from the type manager + // - get the slot from the per-type storage array objData.EmitNaturalInt(typeTlsIndex); return objData.ToObjectData(); From cdfd0a34c9a15d6c0b80815fc633e416842e4a23 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Wed, 14 Jun 2023 16:20:24 -0700 Subject: [PATCH 17/24] follow up change that was suggested in the previous PR --- .../TypeSystem/Common/Utilities/GCPointerMap.Algorithm.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/tools/Common/TypeSystem/Common/Utilities/GCPointerMap.Algorithm.cs b/src/coreclr/tools/Common/TypeSystem/Common/Utilities/GCPointerMap.Algorithm.cs index fd21b72a066a82..2907ba26b6cf27 100644 --- a/src/coreclr/tools/Common/TypeSystem/Common/Utilities/GCPointerMap.Algorithm.cs +++ b/src/coreclr/tools/Common/TypeSystem/Common/Utilities/GCPointerMap.Algorithm.cs @@ -101,7 +101,7 @@ public static GCPointerMap FromStaticLayout(MetadataType type) return builder.ToGCMap(); } - private static void MapThreadStaticsForType(GCPointerMapBuilder builder, MetadataType type, int baseOffset) + private static void MapThreadStaticsForType(ref GCPointerMapBuilder builder, MetadataType type, int baseOffset) { foreach (FieldDesc field in type.GetFields()) { @@ -133,7 +133,7 @@ public static GCPointerMap FromThreadStaticLayout(MetadataType type) { GCPointerMapBuilder builder = new GCPointerMapBuilder(type.ThreadGcStaticFieldSize.AsInt, type.Context.Target.PointerSize); - MapThreadStaticsForType(builder, type, baseOffset: 0); + MapThreadStaticsForType(ref builder, type, baseOffset: 0); Debug.Assert(builder.ToGCMap().Size * type.Context.Target.PointerSize >= type.ThreadGcStaticFieldSize.AsInt); return builder.ToGCMap(); @@ -148,7 +148,7 @@ public static GCPointerMap FromInlinedThreadStatics( GCPointerMapBuilder builder = new GCPointerMapBuilder(threadStaticSize, pointerSize); foreach (var type in types) { - MapThreadStaticsForType(builder, type, offsets[type]); + MapThreadStaticsForType(ref builder, type, offsets[type]); } return builder.ToGCMap(); From b650cbbc16234a6ef507a6e8e00ae2eb19249c68 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Thu, 15 Jun 2023 11:46:07 -0700 Subject: [PATCH 18/24] Remove use of RhpGetInlinedThreadstaticBase on x64 --- .../nativeaot/Runtime/amd64/MiscStubs.S | 22 --------------- .../nativeaot/Runtime/amd64/MiscStubs.asm | 2 -- .../nativeaot/Runtime/arm64/MiscStubs.S | 26 ------------------ .../nativeaot/Runtime/arm64/MiscStubs.asm | 27 ------------------- .../src/Internal/Runtime/ThreadStatics.cs | 1 - .../DependencyAnalysis/NodeFactory.cs | 8 +----- .../Target_X64/X64ReadyToRunHelperNode.cs | 6 ++--- src/coreclr/tools/aot/ILCompiler/Program.cs | 9 +++---- 8 files changed, 6 insertions(+), 95 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/amd64/MiscStubs.S b/src/coreclr/nativeaot/Runtime/amd64/MiscStubs.S index e48f96b22722ea..34acbfe3795e81 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/MiscStubs.S +++ b/src/coreclr/nativeaot/Runtime/amd64/MiscStubs.S @@ -44,25 +44,3 @@ LOCAL_LABEL(ProbeLoop): RESET_FRAME_WITH_RBP ret NESTED_END RhpStackProbe, _TEXT - -// this helper is a last resort fallback for platforms that do not inline TLS access sequences -#if !defined(TARGET_LINUX) - -NESTED_ENTRY RhpGetInlinedThreadStaticBase, _TEXT, NoHandler - // On exit: - // rax - the thread static base for the given type - - // rdi = &tls_InlinedThreadStatics - INLINE_GET_TLS_VAR tls_InlinedThreadStatics - mov rdi, rax - - // get per-thread storage - mov rax, [rdi] - test rax, rax - jz C_FUNC(RhpGetInlinedThreadStaticBaseSlow) // rdi contains the storage ref - - // return it - ret -NESTED_END RhpGetInlinedThreadStaticBase, _TEXT - -#endif diff --git a/src/coreclr/nativeaot/Runtime/amd64/MiscStubs.asm b/src/coreclr/nativeaot/Runtime/amd64/MiscStubs.asm index f66decc97738eb..c3eb1fc2964eae 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/MiscStubs.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/MiscStubs.asm @@ -3,8 +3,6 @@ include AsmMacros.inc -EXTERN RhpGetInlinedThreadStaticBaseSlow : PROC - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; The following helper will access ("probe") a word on each page of the stack ; starting with the page right beneath rsp down to the one pointed to by r11. diff --git a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S index 34af83ccf0b985..e69de29bb2d1d6 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S +++ b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S @@ -1,26 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include -#include "AsmOffsets.inc" - -#ifndef TARGET_ANDROID -NESTED_ENTRY RhpGetInlinedThreadStaticBase, _TEXT, NoHandler - // On exit: - // x0 - the thread static base for the given type - - // x1 = GetThread() - INLINE_GET_TLS_VAR x1, C_FUNC(tls_InlinedThreadStatics) - - // get per-thread storage - ldr x0, [x1] - cbnz x0, HaveValue - mov x0, x1 - b C_FUNC(RhpGetInlinedThreadStaticBaseSlow) - -HaveValue: - // return it - ret - -NESTED_END RhpGetInlinedThreadStaticBase, _TEXT -#endif diff --git a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm index cdb076b3d56b26..e69de29bb2d1d6 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm @@ -1,27 +0,0 @@ -;; Licensed to the .NET Foundation under one or more agreements. -;; The .NET Foundation licenses this file to you under the MIT license. - -#include "AsmMacros.h" - - EXTERN RhpGetInlinedThreadStaticBaseSlow - - TEXTAREA - -;; On exit: -;; x0 - the thread static base for the given type - LEAF_ENTRY RhpGetInlinedThreadStaticBase - ;; x1 = &tls_InlinedThreadStatics, TRASHES x2 - INLINE_GET_TLS_VAR x1, x2, tls_InlinedThreadStatics - - ;; get per-thread storage - ldr x0, [x1] - cbnz x0, HaveValue - mov x0, x1 - b RhpGetInlinedThreadStaticBaseSlow - -HaveValue - ;; return it - ret - LEAF_END RhpGetInlinedThreadStaticBase - - end diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/ThreadStatics.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/ThreadStatics.cs index 1db0d4c4b8a638..7acd6460acedd3 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/ThreadStatics.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/ThreadStatics.cs @@ -32,7 +32,6 @@ internal static unsafe object GetThreadStaticBaseForType(TypeManagerSlot* pModul return GetUninlinedThreadStaticBaseForType(pModuleData, typeTlsIndex); } - [RuntimeExport("RhpGetInlinedThreadStaticBaseSlow")] internal static unsafe object GetInlinedThreadStaticBaseSlow(ref object? threadStorage) { Debug.Assert(threadStorage == null); diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs index 9ec043d4eca706..eeefe7dffb4d05 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs @@ -1302,13 +1302,6 @@ public virtual void AttachToDependencyGraph(DependencyAnalyzerBase graph.AddRoot(ThreadStaticsRegion, "ThreadStaticsRegion is always generated"); graph.AddRoot(EagerCctorTable, "EagerCctorTable is always generated"); graph.AddRoot(TypeManagerIndirection, "TypeManagerIndirection is always generated"); - - if (_target.Architecture == TargetArchitecture.X64 && - (_target.OperatingSystem == TargetOS.Linux || _target.OperatingSystem == TargetOS.Windows)) - { - graph.AddRoot(TlsRoot, "TlsRoot is always generated"); - } - graph.AddRoot(FrozenSegmentRegion, "FrozenSegmentRegion is always generated"); graph.AddRoot(InterfaceDispatchCellSection, "Interface dispatch cell section is always generated"); graph.AddRoot(ModuleInitializerList, "Module initializer list is always generated"); @@ -1316,6 +1309,7 @@ public virtual void AttachToDependencyGraph(DependencyAnalyzerBase if (_inlinedThreadStatics.IsComputed()) { graph.AddRoot(_inlinedThreadStatiscNode, "Inlined threadstatics are used if present"); + graph.AddRoot(TlsRoot, "nlined threadstatics are used if present"); } ReadyToRunHeader.Add(ReadyToRunSectionType.GCStaticRegion, GCStaticsRegion); diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs index 150643a9765787..a345294b60f3fb 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs @@ -312,13 +312,11 @@ private static void EmitInlineTLSAccess(NodeFactory factory, ref X64Emitter enco // movq _\Var @TLVP(% rip), % rdi // callq * (% rdi) - ISymbolNode helper = factory.ExternSymbol("RhpGetInlinedThreadStaticBase"); - encoder.EmitJMP(helper); + throw new NotImplementedException(); } else { - ISymbolNode helper = factory.ExternSymbol("RhpGetInlinedThreadStaticBase"); - encoder.EmitJMP(helper); + throw new NotImplementedException(); } } } diff --git a/src/coreclr/tools/aot/ILCompiler/Program.cs b/src/coreclr/tools/aot/ILCompiler/Program.cs index 253527f1c9c128..acc940a6df43e2 100644 --- a/src/coreclr/tools/aot/ILCompiler/Program.cs +++ b/src/coreclr/tools/aot/ILCompiler/Program.cs @@ -491,12 +491,9 @@ void RunScanner() builder.UsePreinitializationManager(preinitManager); } - // If we have a scanner, we can inline threadstatics storage using the information - // we collected at scanning time. - // Inlined storage implies a single type manager, thus we do not do it in multifile case. - // Linux and Windows on x64 no longer have this limitation since TLS access is inlined into the calling module. - if (!multiFile || - (targetArchitecture == TargetArchitecture.X64 && (targetOS == TargetOS.Windows || targetOS == TargetOS.Linux))) + // If we have a scanner, we can inline threadstatics storage using the information we collected at scanning time. + // This could be a command line switch if we really wanted to. + if ((targetArchitecture == TargetArchitecture.X64 && (targetOS == TargetOS.Windows || targetOS == TargetOS.Linux))) { builder.UseInlinedThreadStatics(scanResults.GetInlinedThreadStatics()); } From f9c5f01a5b9435b30c3db8d5eec87281940545dd Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Thu, 15 Jun 2023 11:51:15 -0700 Subject: [PATCH 19/24] Remove use of RhpGetInlinedThreadStaticBase on arm64 --- .../Target_ARM64/ARM64Emitter.cs | 9 +++++++ .../Target_ARM64/ARM64ReadyToRunHelperNode.cs | 26 ++++++++++++------- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs index 45b5018b58fa8b..87b92d838970f3 100644 --- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs +++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs @@ -189,6 +189,15 @@ public void EmitJE(ISymbolNode symbol) EmitJMP(symbol); } + public void EmitJNE(ISymbolNode symbol) + { + uint offset = symbol.RepresentsIndirectionCell ? 6u : 2u; + + Builder.EmitUInt(0b01010100_0000000000000000000_0_0000u | offset << 5); + + EmitJMP(symbol); + } + private static bool InSignedByteRange(int i) { return i == (int)(sbyte)i; diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs index 8e43f126e0cc74..24d3222e51fc27 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs @@ -74,26 +74,25 @@ protected override void EmitCode(NodeFactory factory, ref ARM64Emitter encoder, ISortableSymbolNode index = factory.TypeThreadStaticIndex(target); if (index is TypeThreadStaticIndexNode ti && ti.Type == null) { - ISymbolNode helper = factory.ExternSymbol("RhpGetInlinedThreadStaticBase"); - if (!factory.PreinitializationManager.HasLazyStaticConstructor(target)) { - encoder.EmitJMP(helper); + EmitInlineTLSAccess(factory, ref encoder); } else { + // First arg: unused address of the TypeManager + // encoder.EmitMOV(encoder.TargetRegister.Arg0, (ushort)0); + + // Second arg: ~0 (index of inlined storage) + encoder.EmitMVN(encoder.TargetRegister.Arg1, 0); + encoder.EmitMOV(encoder.TargetRegister.Arg2, factory.TypeNonGCStaticsSymbol(target)); encoder.EmitSUB(encoder.TargetRegister.Arg2, NonGCStaticsNode.GetClassConstructorContextSize(factory.Target)); - encoder.EmitLDR(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg2); encoder.EmitCMP(encoder.TargetRegister.Arg3, 0); - encoder.EmitJE(helper); - // First arg: unused address of the TypeManager - encoder.EmitMOV(encoder.TargetRegister.Arg0, (ushort)0); - // Second arg: ~0 (index of inlined storage) - encoder.EmitMVN(encoder.TargetRegister.Arg1, 0); - encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnThreadStaticBase)); + encoder.EmitJNE(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnThreadStaticBase)); + EmitInlineTLSAccess(factory, ref encoder); } } else @@ -227,5 +226,12 @@ protected override void EmitCode(NodeFactory factory, ref ARM64Emitter encoder, throw new NotImplementedException(); } } + + // emits code that results in ThreadStaticBase referenced in X0. + // may trash volatile registers. (there are calls to the slow helper and possibly to the platform's TLS support) + private static void EmitInlineTLSAccess(NodeFactory factory, ref ARM64Emitter encoder) + { + throw new NotImplementedException(); + } } } From bca1c3b352ac6e911118e1577bdd64d166d2c6c3 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Thu, 15 Jun 2023 11:54:34 -0700 Subject: [PATCH 20/24] removed tls_InlinedThreadStatics --- src/coreclr/nativeaot/Runtime/threadstore.cpp | 11 ----------- .../Target_X64/X64ReadyToRunHelperNode.cs | 4 ++-- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/threadstore.cpp b/src/coreclr/nativeaot/Runtime/threadstore.cpp index 2562b3067adbc6..67a6949fd7fb06 100644 --- a/src/coreclr/nativeaot/Runtime/threadstore.cpp +++ b/src/coreclr/nativeaot/Runtime/threadstore.cpp @@ -430,17 +430,6 @@ C_ASSERT(sizeof(Thread) == sizeof(ThreadBuffer)); #ifndef _MSC_VER __thread ThreadBuffer tls_CurrentThread; - -// tls_InlinedThreadStatics should be emitted by ILC and that is already the case for win-x64 and linux-x64 -// the remaining platforms use tls_InlinedThreadStatics defined here until - -#if defined(__APPLE__) || defined(TARGET_ARM64) -__thread InlinedThreadStaticRoot tls_InlinedThreadStatics; -#endif - -#elif defined(TARGET_ARM64) -EXTERN_C __declspec(thread) InlinedThreadStaticRoot tls_InlinedThreadStatics; -__declspec(thread) InlinedThreadStaticRoot tls_InlinedThreadStatics; #endif EXTERN_C ThreadBuffer* RhpGetThread() diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs index a345294b60f3fb..c9dd7b5841c71a 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs @@ -244,7 +244,7 @@ private static void EmitInlineTLSAccess(NodeFactory factory, ref X64Emitter enco // mov rax,qword ptr gs:[58h] encoder.Builder.EmitBytes(new byte[] { 0x65, 0x48, 0x8B, 0x04, 0x25, 0x58, 0x00, 0x00, 0x00 }); - // mov ecx,[tls_InlinedThreadStatics] + // mov ecx, SECTIONREL tlsRoot encoder.Builder.EmitBytes(new byte[] { 0xB9 }); encoder.Builder.EmitReloc(tlsRoot, RelocType.IMAGE_REL_SECREL); @@ -263,7 +263,7 @@ private static void EmitInlineTLSAccess(NodeFactory factory, ref X64Emitter enco // mov rax,qword ptr [rax+rcx*8] encoder.Builder.EmitBytes(new byte[] { 0x48, 0x8B, 0x04, 0xC8 }); - // mov ecx,[tls_InlinedThreadStatics] + // mov ecx, SECTIONREL tlsRoot encoder.Builder.EmitBytes(new byte[] { 0xB9 }); encoder.Builder.EmitReloc(tlsRoot, RelocType.IMAGE_REL_SECREL); From ff29a9352e09c56b9f19cd431aa245c7c304c156 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Thu, 15 Jun 2023 14:22:47 -0700 Subject: [PATCH 21/24] a few cleanups/typos --- src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S | 5 +++++ src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm | 8 ++++++++ .../Compiler/DependencyAnalysis/NodeFactory.cs | 2 +- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S index e69de29bb2d1d6..ea5d91a1a1c1f9 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S +++ b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S @@ -0,0 +1,5 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "AsmOffsets.inc" diff --git a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm index e69de29bb2d1d6..49baea4977259b 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm @@ -0,0 +1,8 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmMacros.h" + + TEXTAREA + + end diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs index eeefe7dffb4d05..6f17af2ddf3b8b 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs @@ -1309,7 +1309,7 @@ public virtual void AttachToDependencyGraph(DependencyAnalyzerBase if (_inlinedThreadStatics.IsComputed()) { graph.AddRoot(_inlinedThreadStatiscNode, "Inlined threadstatics are used if present"); - graph.AddRoot(TlsRoot, "nlined threadstatics are used if present"); + graph.AddRoot(TlsRoot, "Inlined threadstatics are used if present"); } ReadyToRunHeader.Add(ReadyToRunSectionType.GCStaticRegion, GCStaticsRegion); From 60658636a5af8a81498fe2224c10a0c8ea8942a3 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Sun, 18 Jun 2023 19:36:23 -0700 Subject: [PATCH 22/24] fix after rebase --- src/coreclr/tools/aot/ILCompiler/Program.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/tools/aot/ILCompiler/Program.cs b/src/coreclr/tools/aot/ILCompiler/Program.cs index acc940a6df43e2..22e0d78f45f757 100644 --- a/src/coreclr/tools/aot/ILCompiler/Program.cs +++ b/src/coreclr/tools/aot/ILCompiler/Program.cs @@ -492,8 +492,8 @@ void RunScanner() } // If we have a scanner, we can inline threadstatics storage using the information we collected at scanning time. - // This could be a command line switch if we really wanted to. - if ((targetArchitecture == TargetArchitecture.X64 && (targetOS == TargetOS.Windows || targetOS == TargetOS.Linux))) + if (!Get(_command.NoInlineTls) && + (targetArchitecture == TargetArchitecture.X64 && (targetOS == TargetOS.Windows || targetOS == TargetOS.Linux))) { builder.UseInlinedThreadStatics(scanResults.GetInlinedThreadStatics()); } From e48b82b7cea74f9eef04f0fe06010f71526e9cda Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Mon, 19 Jun 2023 15:21:50 -0700 Subject: [PATCH 23/24] inlined TLS support for linux-arm64 --- .../DependencyAnalysis/ObjectDataBuilder.cs | 8 +++ .../Compiler/DependencyAnalysis/Relocation.cs | 32 ++++++++- .../Target_ARM64/ARM64Emitter.cs | 8 +++ .../DependencyAnalysis/ObjectWriter.cs | 7 ++ .../Target_ARM64/ARM64ReadyToRunHelperNode.cs | 71 ++++++++++++++++++- src/coreclr/tools/aot/ILCompiler/Program.cs | 2 +- 6 files changed, 125 insertions(+), 3 deletions(-) diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/ObjectDataBuilder.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/ObjectDataBuilder.cs index 4369699dcb9d12..67c9931305c132 100644 --- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/ObjectDataBuilder.cs +++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/ObjectDataBuilder.cs @@ -307,6 +307,14 @@ public void EmitReloc(ISymbolNode symbol, RelocType relocType, int delta = 0) case RelocType.IMAGE_REL_BASED_ARM64_PAGEBASE_REL21: case RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12L: case RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A: + + case RelocType.IMAGE_REL_AARCH64_TLSDESC_ADR_PAGE21: + case RelocType.IMAGE_REL_AARCH64_TLSDESC_LD64_LO12: + case RelocType.IMAGE_REL_AARCH64_TLSDESC_ADD_LO12: + case RelocType.IMAGE_REL_AARCH64_TLSDESC_CALL: + case RelocType.IMAGE_REL_AARCH64_TLSLE_ADD_TPREL_HI12: + case RelocType.IMAGE_REL_AARCH64_TLSLE_ADD_TPREL_LO12_NC: + case RelocType.IMAGE_REL_BASED_LOONGARCH64_PC: case RelocType.IMAGE_REL_BASED_LOONGARCH64_JIR: Debug.Assert(delta == 0); diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs index 95d86d7024368c..b833c909ff5b79 100644 --- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs +++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs @@ -31,10 +31,26 @@ public enum RelocType // // Relocation operators related to TLS access // - IMAGE_REL_SECREL = 0x104, // 32 bit offset from base of section containing target + + // Windows x64 + IMAGE_REL_SECREL = 0x104, + + // Linux x64 + // GD model IMAGE_REL_TLSGD = 0x105, + // LE model IMAGE_REL_TPOFF = 0x106, + // Linux arm64 + // TLSDESC (dynamic) + IMAGE_REL_AARCH64_TLSDESC_ADR_PAGE21 = 0x107, + IMAGE_REL_AARCH64_TLSDESC_LD64_LO12 = 0x108, + IMAGE_REL_AARCH64_TLSDESC_ADD_LO12 = 0x109, + IMAGE_REL_AARCH64_TLSDESC_CALL = 0x10A, + // LE model + IMAGE_REL_AARCH64_TLSLE_ADD_TPREL_HI12 = 0x10B, + IMAGE_REL_AARCH64_TLSLE_ADD_TPREL_LO12_NC = 0x10C, + // // Relocations for R2R image production // @@ -483,6 +499,20 @@ public static unsafe long ReadValue(RelocType relocType, void* location) return GetArm64Rel21((uint*)location); case RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A: return GetArm64Rel12((uint*)location); + case RelocType.IMAGE_REL_AARCH64_TLSDESC_LD64_LO12: + case RelocType.IMAGE_REL_AARCH64_TLSDESC_ADD_LO12: + case RelocType.IMAGE_REL_AARCH64_TLSLE_ADD_TPREL_HI12: + case RelocType.IMAGE_REL_AARCH64_TLSLE_ADD_TPREL_LO12_NC: + // TLS relocs do not have offsets + Debug.Assert((GetArm64Rel12((uint*)location) & 0xFF) == 0); + return 0; + case RelocType.IMAGE_REL_AARCH64_TLSDESC_ADR_PAGE21: + // TLS relocs do not have offsets + Debug.Assert((GetArm64Rel21((uint*)location) & 0xFF) == 0); + return 0; + case RelocType.IMAGE_REL_AARCH64_TLSDESC_CALL: + // TLS relocs do not have offsets + return 0; case RelocType.IMAGE_REL_BASED_LOONGARCH64_PC: return (long)GetLoongArch64PC12((uint*)location); case RelocType.IMAGE_REL_BASED_LOONGARCH64_JIR: diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs index 87b92d838970f3..d516b578f83a11 100644 --- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs +++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs @@ -176,10 +176,18 @@ public void EmitRET() public void EmitRETIfEqual() { + // b.ne #8 Builder.EmitUInt(0b01010100_0000000000000000010_0_0001u); EmitRET(); } + public void EmitRETIfNotEqual() + { + // b.eq #8 + Builder.EmitUInt(0b01010100_0000000000000000010_0_0000u); + EmitRET(); + } + public void EmitJE(ISymbolNode symbol) { uint offset = symbol.RepresentsIndirectionCell ? 6u : 2u; diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ObjectWriter.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ObjectWriter.cs index fb6adbae6840a4..9e7241048ff997 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ObjectWriter.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ObjectWriter.cs @@ -1079,6 +1079,13 @@ public static void EmitObject(string objectFilePath, IReadOnlyCollection Date: Fri, 23 Jun 2023 07:35:20 -0700 Subject: [PATCH 24/24] PR feedback --- src/coreclr/nativeaot/Runtime.Base/src/System/Object.cs | 3 --- .../src/Internal/Runtime/ThreadStatics.cs | 2 +- .../Target_ARM64/ARM64ReadyToRunHelperNode.cs | 6 +++--- .../Target_X64/X64ReadyToRunHelperNode.cs | 8 ++++---- .../Compiler/DependencyAnalysis/TlsRootNode.cs | 1 - 5 files changed, 8 insertions(+), 12 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime.Base/src/System/Object.cs b/src/coreclr/nativeaot/Runtime.Base/src/System/Object.cs index b7e3d15bf6e8a1..08d6279a064bc9 100644 --- a/src/coreclr/nativeaot/Runtime.Base/src/System/Object.cs +++ b/src/coreclr/nativeaot/Runtime.Base/src/System/Object.cs @@ -45,9 +45,6 @@ internal MethodTable* MethodTable } } - [Runtime.CompilerServices.Intrinsic] - internal static extern MethodTable* MethodTableOf(); - internal EETypePtr EETypePtr { get diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/ThreadStatics.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/ThreadStatics.cs index 7acd6460acedd3..7bfcbff517b22f 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/ThreadStatics.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/ThreadStatics.cs @@ -36,7 +36,7 @@ internal static unsafe object GetInlinedThreadStaticBaseSlow(ref object? threadS { Debug.Assert(threadStorage == null); // Allocate an object that will represent a memory block for all thread static fields - TypeManagerHandle typeManager = (new object()).GetMethodTable()->TypeManager; + TypeManagerHandle typeManager = EETypePtr.EETypePtrOf().ToPointer()->TypeManager; object threadStaticBase = AllocateThreadStaticStorageForType(typeManager, 0); // register the storage location with the thread for GC reporting. diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs index 5e6157a904294b..83c2b3db3bab22 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs @@ -235,12 +235,12 @@ private static void EmitInlineTLSAccess(NodeFactory factory, ref ARM64Emitter en ISymbolNode tlsRoot = factory.TlsRoot; // IsSingleFileCompilation is not enough to guarantee that we can use "Initial Executable" optimizations. // we need a special compiler flag analogous to /GA. Just assume "false" for now. - // bool singleFileExe = factory.CompilationModuleGroup.IsSingleFileCompilation; - bool singleFileExe = false; + // bool isInitialExecutable = factory.CompilationModuleGroup.IsSingleFileCompilation; + bool isInitialExecutable = false; if (factory.Target.OperatingSystem == TargetOS.Linux) { - if (singleFileExe) + if (isInitialExecutable) { // mrs x0, tpidr_el0 encoder.Builder.EmitUInt(0xd53bd040); diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs index c9dd7b5841c71a..8fbd5f609b2546 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs @@ -234,12 +234,12 @@ private static void EmitInlineTLSAccess(NodeFactory factory, ref X64Emitter enco ISymbolNode tlsRoot = factory.TlsRoot; // IsSingleFileCompilation is not enough to guarantee that we can use "Initial Executable" optimizations. // we need a special compiler flag analogous to /GA. Just assume "false" for now. - // bool singleFileExe = factory.CompilationModuleGroup.IsSingleFileCompilation; - bool singleFileExe = false; + // bool isInitialExecutable = factory.CompilationModuleGroup.IsSingleFileCompilation; + bool isInitialExecutable = false; if (factory.Target.IsWindows) { - if (singleFileExe) + if (isInitialExecutable) { // mov rax,qword ptr gs:[58h] encoder.Builder.EmitBytes(new byte[] { 0x65, 0x48, 0x8B, 0x04, 0x25, 0x58, 0x00, 0x00, 0x00 }); @@ -279,7 +279,7 @@ private static void EmitInlineTLSAccess(NodeFactory factory, ref X64Emitter enco } else if (factory.Target.OperatingSystem == TargetOS.Linux) { - if (singleFileExe) + if (isInitialExecutable) { // movq %fs:0x0,%rax encoder.Builder.EmitBytes(new byte[] { 0x64, 0x48, 0x8B, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00 }); diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs index d8a0fa1accedcd..424ba7130f0df7 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs @@ -9,7 +9,6 @@ public class TlsRootNode : ObjectNode, ISymbolDefinitionNode { public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb) { - // tls_InlinedThreadStatics sb.Append(nameMangler.CompilationUnitPrefix).Append("tls_InlinedThreadStatics"); } public int Offset => 0;