-
Notifications
You must be signed in to change notification settings - Fork 12.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Introduce paged vector #66430
Introduce paged vector #66430
Conversation
@llvm/pr-subscribers-llvm-adt @llvm/pr-subscribers-clang-modules ChangesNone -- Full diff: https://github.com//pull/66430.diff5 Files Affected:
diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h index 2f846502d6f3327..b1942a3d86afc37 100644 --- a/clang/include/clang/Basic/SourceManager.h +++ b/clang/include/clang/Basic/SourceManager.h @@ -43,6 +43,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" +#include "llvm/ADT/PagedVector.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" @@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase<SourceManager> { /// /// Negative FileIDs are indexes into this table. To get from ID to an index, /// use (-ID - 2). - SmallVector<SrcMgr::SLocEntry, 0> LoadedSLocEntryTable; + PagedVector<SrcMgr::SLocEntry> LoadedSLocEntryTable; /// The starting offset of the next local SLocEntry. /// diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index dc1eb21c27801fe..567aecc8246e761 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -38,6 +38,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/PagedVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -487,7 +488,7 @@ class ASTReader /// /// When the pointer at index I is non-NULL, the type with /// ID = (I + 1) << FastQual::Width has already been loaded - std::vector<QualType> TypesLoaded; + PagedVector<QualType> TypesLoaded; using GlobalTypeMapType = ContinuousRangeMap<serialization::TypeID, ModuleFile *, 4>; @@ -501,7 +502,7 @@ class ASTReader /// /// When the pointer at index I is non-NULL, the declaration with ID /// = I + 1 has already been loaded. - std::vector<Decl *> DeclsLoaded; + PagedVector<Decl *> DeclsLoaded; using GlobalDeclMapType = ContinuousRangeMap<serialization::DeclID, ModuleFile *, 4>; diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index 0521ac7b30339ab..c028afe63ac85ad 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -458,7 +458,7 @@ SourceManager::AllocateLoadedSLocEntries(unsigned NumSLocEntries, CurrentLoadedOffset - TotalSize < NextLocalOffset) { return std::make_pair(0, 0); } - LoadedSLocEntryTable.resize(LoadedSLocEntryTable.size() + NumSLocEntries); + LoadedSLocEntryTable.expand(LoadedSLocEntryTable.size() + NumSLocEntries); SLocEntryLoaded.resize(LoadedSLocEntryTable.size()); CurrentLoadedOffset -= TotalSize; int ID = LoadedSLocEntryTable.size(); @@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes SourceManager::getMemoryBufferSizes() const { } size_t SourceManager::getDataStructureSizes() const { - size_t size = llvm::capacity_in_bytes(MemBufferInfos) - + llvm::capacity_in_bytes(LocalSLocEntryTable) - + llvm::capacity_in_bytes(LoadedSLocEntryTable) - + llvm::capacity_in_bytes(SLocEntryLoaded) - + llvm::capacity_in_bytes(FileInfos); + size_t size = llvm::capacity_in_bytes(MemBufferInfos) + + llvm::capacity_in_bytes(LocalSLocEntryTable) + + llvm::capacity_in_bytes(LoadedSLocEntryTable.materialised()) + + llvm::capacity_in_bytes(SLocEntryLoaded) + + llvm::capacity_in_bytes(FileInfos); if (OverriddenFilesInfo) size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles); diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 0952244d037a77c..18b1a9a2480a73e 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -3260,7 +3260,7 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, std::make_pair(LocalBaseTypeIndex, F.BaseTypeIndex - LocalBaseTypeIndex)); - TypesLoaded.resize(TypesLoaded.size() + F.LocalNumTypes); + TypesLoaded.expand(TypesLoaded.size() + F.LocalNumTypes); } break; } @@ -3290,7 +3290,7 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, // module. F.GlobalToLocalDeclIDs[&F] = LocalBaseDeclID; - DeclsLoaded.resize(DeclsLoaded.size() + F.LocalNumDecls); + DeclsLoaded.expand(DeclsLoaded.size() + F.LocalNumDecls); } break; } @@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() { std::fprintf(stderr, "*** AST File Statistics:\n"); unsigned NumTypesLoaded = - TypesLoaded.size() - llvm::count(TypesLoaded, QualType()); + TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType()); unsigned NumDeclsLoaded = - DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr); + DeclsLoaded.size() - + llvm::count(DeclsLoaded.materialised(), (Decl *)nullptr); unsigned NumIdentifiersLoaded = IdentifiersLoaded.size() - llvm::count(IdentifiersLoaded, (IdentifierInfo *)nullptr); diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h new file mode 100644 index 000000000000000..dab0d249aa706e4 --- /dev/null +++ b/llvm/include/llvm/ADT/PagedVector.h @@ -0,0 +1,96 @@ +//===- llvm/ADT/PagedVector.h - 'Lazyly allocated' vectors --------*- C++ +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the PagedVector class. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_ADT_PAGEDVECTOR_H +#define LLVM_ADT_PAGEDVECTOR_H + +#include <vector> + +// Notice that this does not have iterators, because if you +// have iterators it probably means you are going to touch +// all the memory in any case, so better use a std::vector in +// the first place. +template <typename T, int PAGE_SIZE = 1024 / sizeof(T)> class PagedVector { + size_t Size = 0; + // Index of where to find a given page in the data + mutable std::vector<int> Lookup; + // Actual data + mutable std::vector<T> Data; + +public: + // Add a range to the vector. + // When vector is accessed, it will call the callback to fill the range + // with data. + + // Lookup an element at position i. + // If the given range is not filled, it will be filled. + // If the given range is filled, return the element. + T &operator[](int Index) const { return at(Index); } + + T &at(int Index) const { + auto &PageId = Lookup[Index / PAGE_SIZE]; + // If the range is not filled, fill it + if (PageId == -1) { + int OldSize = Data.size(); + PageId = OldSize / PAGE_SIZE; + // Allocate the memory + Data.resize(OldSize + PAGE_SIZE); + // Fill the whole capacity with empty elements + for (int I = 0; I < PAGE_SIZE; ++I) { + Data[I + OldSize] = T(); + } + } + // Return the element + return Data[Index % PAGE_SIZE + PAGE_SIZE * PageId]; + } + + // Return the size of the vector + size_t capacity() const { return Lookup.size() * PAGE_SIZE; } + + size_t size() const { return Size; } + + // Expands the vector to the given size. + // If the vector is already bigger, does nothing. + void expand(size_t NewSize) { + // You cannot shrink the vector, otherwise + // you would have to invalidate + assert(NewSize >= Size); + if (NewSize <= Size) { + return; + } + if (NewSize <= capacity()) { + Size = NewSize; + return; + } + auto Pages = NewSize / PAGE_SIZE; + auto Remainder = NewSize % PAGE_SIZE; + if (Remainder) { + Pages += 1; + } + assert(Pages > Lookup.size()); + Lookup.resize(Pages, -1); + Size = NewSize; + } + + // Return true if the vector is empty + bool empty() const { return Size == 0; } + /// Clear the vector + void clear() { + Size = 0; + Lookup.clear(); + Data.clear(); + } + + std::vector<T> const &materialised() const { return Data; } +}; + +#endif // LLVM_ADT_PAGEDVECTOR_H |
I feel like it looks better to touch https://llvm.org/docs/LangRef.html to introduce new ADT and tell what is the benefit (pros and cons) of the new data structure. |
Thank you for your feedback. Could you please be more specific? I cannot find any reference to ADT in the document you pointed me to. |
Sorry. I meant to sent: https://llvm.org/docs/ProgrammersManual.html#picking-the-right-data-structure-for-a-task. Also I feel like the comments are much less than other ADT types. I mean these are important and fundamental data structures. So that almost every LLVM developer need to learn them someday. And it would be pretty helpful to see the explanations. You can see other the implementation of other ADTs for example. |
Thank you for the clarification. I have added the requested documentation and there are now more lines of comment than lines of code. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I feel better to add unittests to llvm/unittests/ADT.
I feel we need more reviewers for introducing ADT types. Let me try to add some people. |
Any numbers on what / how much this improves? |
@ChuanqiXu9 I have added a simple unit test, which has 100% code coverage as far as I can tell and covers the usecase the container will be used for. @nikic This is in the context of improving the memory used when loading modules, in particular my usecase is the runtime overhead of the ROOT/cling C++ interpreter / I/O subsystem. Without this patch, the initialisation of the system, which on startup loads a bunch of unneeded modules, grows to up 80 MB as you can see from: With this patch applied, the total overhead goes down to 52MB: As you can see the |
@nikic I suspect the improvements will be also visible in any use case where modules are eagerly loaded but only sparsely used. |
@ChuanqiXu9 I think I have addressed all your comments. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks. Sorry for not commenting everything at the first sight. I don't have further concerns. But I'd like to give more times for other reviewers to take a look.
@ktf, could you update the description of the PR including details about the reasoning of this change and the current benchmarks. @nikic, this work is in the context of root-project/root#13000. TL;DR: The downstream client has a long running session and preloads a relatively large list of modules. The clang::ASTReader does almost all of of the work lazily when a declaration is needed. However, in a few places we pre-allocate large vectors (for types or source locations for the entire module file) just to make sure the sequencing is right. While this patch will not make this lazy, as we should, it goes a long way towards making this a relatively cheap for module content which is sparsely used. One good example where this easily becomes a problem is building a pcm file for boost. |
@vgvassilev done. @ChuanqiXu9 I think I addressed all your additional comments and updated the PR. Please let me know if you have further ones or if you want me to squash the commits into one. |
I think we need to add some test on the clang modules side. I am currently thinking how.. |
Why don't just turn SLocEntry into a POD (if it is not yet already) and use SmallVector::resize_for_overwrite? That way the vector will not be initialized and the OS will only allocate physical memory on the first access to a page. |
Never mind, it isn't "just", the class is more complex than it looks, and there is also QualType. |
@s-barannikov, AFAICT, that would not work because the overwrite is not unconditional, but it will actually check for default initialised content. Basically the current mechanics relies on default initialisation rather than being "uninitialised". What you propose for sure does not work out of the box with LoadedDecls (I just checked). I even thought to have a Besides the above considerations, at least for my use case, I also noticed the page granularity is actually not good enough to guarantee good memory savings (that's why I use |
Co-authored-by: Jakub Kuderski <[email protected]>
Co-authored-by: Jakub Kuderski <[email protected]>
Co-authored-by: Jakub Kuderski <[email protected]>
Co-authored-by: Jakub Kuderski <[email protected]>
Co-authored-by: Jakub Kuderski <[email protected]>
I believe we have beaten this PR to death and would propose to merge it when the builds succeed. |
@ktf, congrats for your first patch to llvm. Well done! |
Thanks! If I were younger I would probably know how to post a dancing banana emoji at this point... ;-) |
Looks like this causes a compile-time regression: https://llvm-compile-time-tracker.com/compare.php?from=abcaebfe3aacb13d46be5e949fd6ed9b4321e2f6&to=4ae51570806ba5c5fcabe6d6dcbe52e3a5d5453b&stat=instructions%3Au About 0.5% at |
It is probably the change in the llvm-project/llvm/include/llvm/ADT/PagedVector.h Lines 82 to 95 in f99bd29
Would adding an |
@vgvassilev ,
would you take care of it? |
These are not available in all build configurations. Originally introuduced in: #66430
@vvereschaka I submitted a fix for death tests in 8580010 |
@kuhar, thanks, you overtook me. Here is my diff: diff --git a/llvm/unittests/ADT/PagedVectorTest.cpp b/llvm/unittests/ADT/PagedVectorTest.cpp
index e1b0c62d3395..c45df6d9e7e8 100644
--- a/llvm/unittests/ADT/PagedVectorTest.cpp
+++ b/llvm/unittests/ADT/PagedVectorTest.cpp
@@ -24,8 +24,8 @@ TEST(PagedVectorTest, EmptyTest) {
EXPECT_EQ(V.materialized_end().getIndex(), 0ULL);
EXPECT_EQ(std::distance(V.materialized_begin(), V.materialized_end()), 0LL);
- EXPECT_DEATH(V[0], "Index < Size");
- EXPECT_DEATH(PagedVector<int>(nullptr), "Allocator cannot be null");
+ EXPECT_DEBUG_DEATH(V[0], "Index < Size");
+ EXPECT_DEBUG_DEATH(PagedVector<int>(nullptr), "Allocator cannot be null");
}
TEST(PagedVectorTest, ExpandTest) {
@@ -69,7 +69,7 @@ TEST(PagedVectorTest, HalfPageFillingTest) {
for (int I = 0; I < 5; ++I)
EXPECT_EQ(V[I], I);
for (int I = 5; I < 10; ++I)
- EXPECT_DEATH(V[I], "Index < Size");
+ EXPECT_DEBUG_DEATH(V[I], "Index < Size");
}
TEST(PagedVectorTest, FillFullMultiPageTest) {
@@ -244,7 +244,7 @@ TEST(PagedVectorTest, ShrinkTest) {
EXPECT_EQ(V.size(), 0ULL);
EXPECT_EQ(V.capacity(), 0ULL);
EXPECT_EQ(std::distance(V.materialized_begin(), V.materialized_end()), 0LL);
- EXPECT_DEATH(V[0], "Index < Size");
+ EXPECT_DEBUG_DEATH(V[0], "Index < Size");
}
TEST(PagedVectorTest, FunctionalityTest) { Could you use |
@vgvassilev I've seen other tests use the pattern from my fix. Feel free to overwrite with your version if that's preferred. |
Also, don't use have to guard that with |
Too late in the night in my end and I closed my laptop already. I am fine with your fix - I find the use of the macro I proposed cleaner though. |
Ideally, I think we could have an llvm-specific macro that combines the guard and the check (say |
@ktf could you take a look at this? |
I also noticed that LoadedSLocEntryTable uses 42 elements tables, which probably introduces some extra divisions to do the indexing. I can try switching to 32 elements. |
Hi @vgvassilev ! |
@mikaelholmen see also #67958. I can't seem to create a version that avoids triggering the warning and is also compatible across both macOS and Windows. Some good idea would be appreciated, especially because I do not have a windows setup and godbolt does not seem to like LLVM as an external package. |
I've tested the following change that forces power of two pages sizes, but it did not have any impact: fa54294 I've done some profiling and found that the regression seems to be related to inlining. In particular getAndAdvanceChar() inside LexTokenInternal() no longer gets inlined, and that's hot code. This only affects builds with GCC. It's not totally obvious to me why that happens, as getAndAdvanceChar() doesn't appear to use PagedVector at all. |
My understanding was that we basically only cared about performance of clang on clang - that we expect people who want an efficient clang to bootstrap? Could |
This is in the context of improving the memory used when loading modules. It addresses in particular the following usecases:
Without this patch, the initialisation of ROOT grows to up 80 MB:
With this patch applied it goes down to 52 MB:
As one can see the
ROOT::Internal::GetROOT2()
initialisation method goes from 67 to 30 MB (with some of the allocations which are now moved later on, due to the lazy allocation of the PagedVector).