Skip to content

Commit

Permalink
Load most of the source manager's information lazily from the PCH
Browse files Browse the repository at this point in the history
file. In particular, only eagerly load source location entries for
files and for the predefines buffer. Other buffers and
macro-instantiation source location entries are loaded lazily.

With the Cocoa-prefixed "Hello, World", we only load 815/26555 source
location entities. This halves the amount of user time we spend in
this "Hello, World" program with -fsyntax-only (down to .007s).

This optimization is part 1 of 2 for the source manager. This
eliminates most of the user time in loading a PCH file. We still spend
too much time initialize File structures (especially in the calls to
stat), so we need to either make the loading of source location
entries for files lazy or import the stat cache from the PTH
implementation.

llvm-svn: 70196
  • Loading branch information
DougGregor committed Apr 27, 2009
1 parent 1f55182 commit 258ae54
Show file tree
Hide file tree
Showing 6 changed files with 373 additions and 142 deletions.
65 changes: 53 additions & 12 deletions clang/include/clang/Basic/SourceManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,16 @@ namespace SrcMgr {
}
};
} // end SrcMgr namespace.


/// \brief External source of source location entries.
class ExternalSLocEntrySource {
public:
virtual ~ExternalSLocEntrySource();

/// \brief Read the source location entry with index ID.
virtual void ReadSLocEntry(unsigned ID) = 0;
};

/// SourceManager - This file handles loading and caching of source files into
/// memory. This object owns the MemoryBuffer objects for all of the loaded
/// files and assigns unique FileID's for each unique #include chain.
Expand Down Expand Up @@ -281,7 +290,15 @@ class SourceManager {
/// NextOffset - This is the next available offset that a new SLocEntry can
/// start at. It is SLocEntryTable.back().getOffset()+size of back() entry.
unsigned NextOffset;


/// \brief If source location entries are being lazily loaded from
/// an external source, this vector indicates whether the Ith source
/// location entry has already been loaded from the external storage.
std::vector<bool> SLocEntryLoaded;

/// \brief An external source for source location entries.
ExternalSLocEntrySource *ExternalSLocEntries;

/// LastFileIDLookup - This is a one-entry cache to speed up getFileID.
/// LastFileIDLookup records the last FileID looked up or created, because it
/// is very common to look up many tokens from the same file.
Expand All @@ -308,7 +325,9 @@ class SourceManager {
explicit SourceManager(const SourceManager&);
void operator=(const SourceManager&);
public:
SourceManager() : LineTable(0), NumLinearScans(0), NumBinaryProbes(0) {
SourceManager()
: ExternalSLocEntries(0), LineTable(0), NumLinearScans(0),
NumBinaryProbes(0) {
clearIDTables();
}
~SourceManager();
Expand Down Expand Up @@ -337,19 +356,25 @@ class SourceManager {
/// createFileID - Create a new FileID that represents the specified file
/// being #included from the specified IncludePosition. This returns 0 on
/// error and translates NULL into standard input.
/// PreallocateID should be non-zero to specify which a pre-allocated,
/// lazily computed source location is being filled in by this operation.
FileID createFileID(const FileEntry *SourceFile, SourceLocation IncludePos,
SrcMgr::CharacteristicKind FileCharacter) {
SrcMgr::CharacteristicKind FileCharacter,
unsigned PreallocatedID = 0,
unsigned Offset = 0) {
const SrcMgr::ContentCache *IR = getOrCreateContentCache(SourceFile);
if (IR == 0) return FileID(); // Error opening file?
return createFileID(IR, IncludePos, FileCharacter);
return createFileID(IR, IncludePos, FileCharacter, PreallocatedID, Offset);
}

/// createFileIDForMemBuffer - Create a new FileID that represents the
/// specified memory buffer. This does no caching of the buffer and takes
/// ownership of the MemoryBuffer, so only pass a MemoryBuffer to this once.
FileID createFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) {
FileID createFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer,
unsigned PreallocatedID = 0,
unsigned Offset = 0) {
return createFileID(createMemBufferContentCache(Buffer), SourceLocation(),
SrcMgr::C_User);
SrcMgr::C_User, PreallocatedID, Offset);
}

/// createMainFileIDForMembuffer - Create the FileID for a memory buffer
Expand All @@ -367,7 +392,9 @@ class SourceManager {
SourceLocation createInstantiationLoc(SourceLocation Loc,
SourceLocation InstantiationLocStart,
SourceLocation InstantiationLocEnd,
unsigned TokLength);
unsigned TokLength,
unsigned PreallocatedID = 0,
unsigned Offset = 0);

//===--------------------------------------------------------------------===//
// FileID manipulation methods.
Expand Down Expand Up @@ -411,8 +438,9 @@ class SourceManager {
/// getLocForStartOfFile - Return the source location corresponding to the
/// first byte of the specified file.
SourceLocation getLocForStartOfFile(FileID FID) const {
assert(FID.ID < SLocEntryTable.size() && SLocEntryTable[FID.ID].isFile());
unsigned FileOffset = SLocEntryTable[FID.ID].getOffset();
assert(FID.ID < SLocEntryTable.size() && "FileID out of range");
assert(getSLocEntry(FID).isFile() && "FileID is not a file");
unsigned FileOffset = getSLocEntry(FID).getOffset();
return SourceLocation::getFileLoc(FileOffset);
}

Expand Down Expand Up @@ -616,11 +644,21 @@ class SourceManager {

const SrcMgr::SLocEntry &getSLocEntry(FileID FID) const {
assert(FID.ID < SLocEntryTable.size() && "Invalid id");
if (ExternalSLocEntries &&
FID.ID < SLocEntryLoaded.size() &&
!SLocEntryLoaded[FID.ID])
ExternalSLocEntries->ReadSLocEntry(FID.ID);
return SLocEntryTable[FID.ID];
}

unsigned getNextOffset() const { return NextOffset; }

/// \brief Preallocate some number of source location entries, which
/// will be loaded as needed from the given external source.
void PreallocateSLocEntries(ExternalSLocEntrySource *Source,
unsigned NumSLocEntries,
unsigned NextOffset);

private:
/// isOffsetInFileID - Return true if the specified FileID contains the
/// specified SourceLocation offset. This is a very hot method.
Expand All @@ -632,15 +670,18 @@ class SourceManager {
// If this is the last entry than it does. Otherwise, the entry after it
// has to not include it.
if (FID.ID+1 == SLocEntryTable.size()) return true;
return SLocOffset < SLocEntryTable[FID.ID+1].getOffset();

return SLocOffset < getSLocEntry(FileID::get(FID.ID+1)).getOffset();
}

/// createFileID - Create a new fileID for the specified ContentCache and
/// include position. This works regardless of whether the ContentCache
/// corresponds to a file or some other input source.
FileID createFileID(const SrcMgr::ContentCache* File,
SourceLocation IncludePos,
SrcMgr::CharacteristicKind DirCharacter);
SrcMgr::CharacteristicKind DirCharacter,
unsigned PreallocatedID = 0,
unsigned Offset = 0);

const SrcMgr::ContentCache *
getOrCreateContentCache(const FileEntry *SourceFile);
Expand Down
14 changes: 13 additions & 1 deletion clang/include/clang/Frontend/PCHBitCodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,19 @@ namespace clang {

/// \brief The value of the next __COUNTER__ to dispense.
/// [PP_COUNTER_VALUE, Val]
PP_COUNTER_VALUE = 14
PP_COUNTER_VALUE = 14,

/// \brief Record code for the table of offsets into the block
/// of source-location information.
SOURCE_LOCATION_OFFSETS = 15,

/// \brief Record code for the set of source location entries
/// that need to be preloaded by the PCH reader.
///
/// This set contains the source location entry for the
/// predefines buffer and for any file entries that need to be
/// preloaded.
SOURCE_LOCATION_PRELOADS = 16
};

/// \brief Record types used within a source manager block.
Expand Down
22 changes: 21 additions & 1 deletion clang/include/clang/Frontend/PCHReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "clang/AST/Type.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/IdentifierTable.h"
#include "clang/Basic/SourceManager.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/APSInt.h"
Expand Down Expand Up @@ -68,7 +69,8 @@ class SwitchCase;
class PCHReader
: public ExternalSemaSource,
public IdentifierInfoLookup,
public ExternalIdentifierLookup {
public ExternalIdentifierLookup,
public ExternalSLocEntrySource {
public:
enum PCHReadResult { Success, Failure, IgnorePCH };

Expand Down Expand Up @@ -102,6 +104,16 @@ class PCHReader
/// this PCH file.
llvm::OwningPtr<llvm::MemoryBuffer> Buffer;

/// \brief Offset type for all of the source location entries in the
/// PCH file.
const uint64_t *SLocOffsets;

/// \brief The number of source location entries in the PCH file.
unsigned TotalNumSLocEntries;

/// \brief Cursor used to read source location entries.
llvm::BitstreamCursor SLocEntryCursor;

/// \brief Offset of each type within the bitstream, indexed by the
/// type ID, or the representation of a Type*.
const uint64_t *TypeOffsets;
Expand Down Expand Up @@ -214,6 +226,10 @@ class PCHReader
/// been de-serialized.
std::multimap<unsigned, AddrLabelExpr *> UnresolvedAddrLabelExprs;

/// \brief The number of source location entries de-serialized from
/// the PCH file.
unsigned NumSLocEntriesRead;

/// \brief The number of statements (and expressions) de-serialized
/// from the PCH file.
unsigned NumStatementsRead;
Expand Down Expand Up @@ -257,6 +273,7 @@ class PCHReader
unsigned PCHPredefLen,
FileID PCHBufferID);
PCHReadResult ReadSourceManagerBlock();
PCHReadResult ReadSLocEntryRecord(unsigned ID);

bool ParseLanguageOptions(const llvm::SmallVectorImpl<uint64_t> &Record);
QualType ReadTypeRecord(uint64_t Offset);
Expand Down Expand Up @@ -380,6 +397,9 @@ class PCHReader
return DecodeIdentifierInfo(ID);
}

/// \brief Read the source location entry with index ID.
virtual void ReadSLocEntry(unsigned ID);

Selector DecodeSelector(unsigned Idx);

Selector GetSelector(const RecordData &Record, unsigned &Idx) {
Expand Down
56 changes: 52 additions & 4 deletions clang/lib/Basic/SourceManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,17 @@ SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) {
return Entry;
}

void SourceManager::PreallocateSLocEntries(ExternalSLocEntrySource *Source,
unsigned NumSLocEntries,
unsigned NextOffset) {
ExternalSLocEntries = Source;
this->NextOffset = NextOffset;
SLocEntryLoaded.resize(NumSLocEntries + 1);
SLocEntryLoaded[0] = true;
SLocEntryTable.resize(SLocEntryTable.size() + NumSLocEntries);
}


//===----------------------------------------------------------------------===//
// Methods to create new FileID's and instantiations.
//===----------------------------------------------------------------------===//
Expand All @@ -317,7 +328,26 @@ SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) {
/// corresponds to a file or some other input source.
FileID SourceManager::createFileID(const ContentCache *File,
SourceLocation IncludePos,
SrcMgr::CharacteristicKind FileCharacter) {
SrcMgr::CharacteristicKind FileCharacter,
unsigned PreallocatedID,
unsigned Offset) {
SLocEntry NewEntry = SLocEntry::get(NextOffset,
FileInfo::get(IncludePos, File,
FileCharacter));
if (PreallocatedID) {
// If we're filling in a preallocated ID, just load in the file
// entry and return.
assert(PreallocatedID < SLocEntryLoaded.size() &&
"Preallocate ID out-of-range");
assert(!SLocEntryLoaded[PreallocatedID] &&
"Source location entry already loaded");
assert(Offset && "Preallocate source location cannot have zero offset");
SLocEntryTable[PreallocatedID]
= SLocEntry::get(Offset, FileInfo::get(IncludePos, File, FileCharacter));
SLocEntryLoaded[PreallocatedID] = true;
return LastFileIDLookup = FileID::get(PreallocatedID);
}

SLocEntryTable.push_back(SLocEntry::get(NextOffset,
FileInfo::get(IncludePos, File,
FileCharacter)));
Expand All @@ -336,8 +366,22 @@ FileID SourceManager::createFileID(const ContentCache *File,
SourceLocation SourceManager::createInstantiationLoc(SourceLocation SpellingLoc,
SourceLocation ILocStart,
SourceLocation ILocEnd,
unsigned TokLength) {
unsigned TokLength,
unsigned PreallocatedID,
unsigned Offset) {
InstantiationInfo II = InstantiationInfo::get(ILocStart,ILocEnd, SpellingLoc);
if (PreallocatedID) {
// If we're filling in a preallocated ID, just load in the
// instantiation entry and return.
assert(PreallocatedID < SLocEntryLoaded.size() &&
"Preallocate ID out-of-range");
assert(!SLocEntryLoaded[PreallocatedID] &&
"Source location entry already loaded");
assert(Offset && "Preallocate source location cannot have zero offset");
SLocEntryTable[PreallocatedID] = SLocEntry::get(Offset, II);
SLocEntryLoaded[PreallocatedID] = true;
return SourceLocation::getMacroLoc(Offset);
}
SLocEntryTable.push_back(SLocEntry::get(NextOffset, II));
assert(NextOffset+TokLength+1 > NextOffset && "Ran out of source locations!");
NextOffset += TokLength+1;
Expand Down Expand Up @@ -391,6 +435,8 @@ FileID SourceManager::getFileIDSlow(unsigned SLocOffset) const {
unsigned NumProbes = 0;
while (1) {
--I;
if (ExternalSLocEntries)
getSLocEntry(FileID::get(I - SLocEntryTable.begin()));
if (I->getOffset() <= SLocOffset) {
#if 0
printf("lin %d -> %d [%s] %d %d\n", SLocOffset,
Expand All @@ -399,7 +445,7 @@ FileID SourceManager::getFileIDSlow(unsigned SLocOffset) const {
LastFileIDLookup.ID, int(SLocEntryTable.end()-I));
#endif
FileID Res = FileID::get(I-SLocEntryTable.begin());

// If this isn't an instantiation, remember it. We have good locality
// across FileID lookups.
if (!I->isInstantiation())
Expand All @@ -421,7 +467,7 @@ FileID SourceManager::getFileIDSlow(unsigned SLocOffset) const {
NumProbes = 0;
while (1) {
unsigned MiddleIndex = (GreaterIndex-LessIndex)/2+LessIndex;
unsigned MidOffset = SLocEntryTable[MiddleIndex].getOffset();
unsigned MidOffset = getSLocEntry(FileID::get(MiddleIndex)).getOffset();

++NumProbes;

Expand Down Expand Up @@ -865,3 +911,5 @@ void SourceManager::PrintStats() const {
llvm::cerr << "FileID scans: " << NumLinearScans << " linear, "
<< NumBinaryProbes << " binary.\n";
}

ExternalSLocEntrySource::~ExternalSLocEntrySource() { }
Loading

0 comments on commit 258ae54

Please sign in to comment.