Skip to content

Commit

Permalink
internal/lsp/cache: optimize Snapshot.clone
Browse files Browse the repository at this point in the history
This change replaces the single large map used for snapshot.goFiles
by a map of 256 stripes, each of which becomes immutable once shared.
This optimizes the common case in which the copy is nearly identical
to the original.

We still need to visit each map entry to see whether it needs to be
deleted (which is rare) and to inherit the handle in the usual case.
This is now done concurrently.

Also, share the (logically immutable) []PackageIDs slices across
old and new snapshots. This was worth 5% of CPU and 1/3 of allocations
(all small).

Benchmark on darwin/arm64 shows a 29% reduction for DidChange.

$ go test -v ./gopls/internal/regtest/bench -run=TestBenchmarkDidChange -didchange_dir=$HOME/w/kubernetes -didchange_file=pkg/util/hash/hash.go

Before:
BenchmarkStatistics	     100	  22955469 ns/op	11308095 B/op	   47412 allocs/op
BenchmarkStatistics	     100	  23454630 ns/op	11226742 B/op	   46882 allocs/op
BenchmarkStatistics	     100	  23618532 ns/op	11258619 B/op	   47068 allocs/op

After goFilesMap:
BenchmarkStatistics	     100	  16643972 ns/op	 8770787 B/op	   46238 allocs/op
BenchmarkStatistics	     100	  17805864 ns/op	 8862926 B/op	   46762 allocs/op
BenchmarkStatistics	     100	  18618255 ns/op	 9308864 B/op	   49776 allocs/op

After goFilesMap and ids sharing:
BenchmarkStatistics          100          16703623 ns/op         8772626 B/op      33812 allocs/op
BenchmarkStatistics          100          16927378 ns/op         8529491 B/op      32328 allocs/op
BenchmarkStatistics          100          16632762 ns/op         8557533 B/op      32497 allocs/op

Also:
- Add comments documenting findings of profiling.
- preallocate slice for knownSubdirs.
- remove unwanted loop over slice in Generation.Inherit

Updates golang/go#45686

Change-Id: Id953699191b8404cf36ba3a7ab9cd78b1d19c0a2
Reviewed-on: https://go-review.googlesource.com/c/tools/+/410176
TryBot-Result: Gopher Robot <[email protected]>
Reviewed-by: Robert Findley <[email protected]>
gopls-CI: kokoro <[email protected]>
Run-TryBot: Alan Donovan <[email protected]>
  • Loading branch information
adonovan committed Jun 10, 2022
1 parent 697795d commit 9651276
Show file tree
Hide file tree
Showing 7 changed files with 191 additions and 37 deletions.
8 changes: 7 additions & 1 deletion internal/lsp/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,14 @@ func (h *fileHandle) Read() ([]byte, error) {
return h.bytes, h.err
}

// hashContents returns a string of hex digits denoting the hash of contents.
//
// TODO(adonovan): opt: use [32]byte array as a value more widely and convert
// to hex digits on demand (rare). The array is larger when it appears as a
// struct field (32B vs 16B) but smaller overall (string data is 64B), has
// better locality, and is more efficiently hashed by runtime maps.
func hashContents(contents []byte) string {
return fmt.Sprintf("%x", sha256.Sum256(contents))
return fmt.Sprintf("%64x", sha256.Sum256(contents))
}

var cacheIndex, sessionIndex, viewIndex int64
Expand Down
2 changes: 1 addition & 1 deletion internal/lsp/cache/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ func (s *Session) createView(ctx context.Context, name string, folder span.URI,
packages: make(map[packageKey]*packageHandle),
meta: NewMetadataGraph(),
files: make(map[span.URI]source.VersionedFileHandle),
goFiles: make(map[parseKey]*parseGoHandle),
goFiles: newGoFileMap(),
symbols: make(map[span.URI]*symbolHandle),
actions: make(map[actionKey]*actionHandle),
workspacePackages: make(map[PackageID]PackagePath),
Expand Down
184 changes: 162 additions & 22 deletions internal/lsp/cache/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ type snapshot struct {
files map[span.URI]source.VersionedFileHandle

// goFiles maps a parseKey to its parseGoHandle.
goFiles map[parseKey]*parseGoHandle
goFiles *goFileMap

// TODO(rfindley): consider merging this with files to reduce burden on clone.
symbols map[span.URI]*symbolHandle
Expand Down Expand Up @@ -663,16 +663,17 @@ func (s *snapshot) transitiveReverseDependencies(id PackageID, ids map[PackageID
func (s *snapshot) getGoFile(key parseKey) *parseGoHandle {
s.mu.Lock()
defer s.mu.Unlock()
return s.goFiles[key]
return s.goFiles.get(key)
}

func (s *snapshot) addGoFile(key parseKey, pgh *parseGoHandle) *parseGoHandle {
s.mu.Lock()
defer s.mu.Unlock()
if existing, ok := s.goFiles[key]; ok {
return existing

if prev := s.goFiles.get(key); prev != nil {
return prev
}
s.goFiles[key] = pgh
s.goFiles.set(key, pgh)
return pgh
}

Expand Down Expand Up @@ -811,6 +812,8 @@ func (s *snapshot) fileWatchingGlobPatterns(ctx context.Context) map[string]stru
patterns := map[string]struct{}{
fmt.Sprintf("**/*.{%s}", extensions): {},
}

// Add a pattern for each Go module in the workspace that is not within the view.
dirs := s.workspace.dirs(ctx, s)
for _, dir := range dirs {
dirName := dir.Filename()
Expand All @@ -830,14 +833,19 @@ func (s *snapshot) fileWatchingGlobPatterns(ctx context.Context) map[string]stru
// contain Go code (golang/go#42348). To handle this, explicitly watch all
// of the directories in the workspace. We find them by adding the
// directories of every file in the snapshot's workspace directories.
var dirNames []string
for _, uri := range s.getKnownSubdirs(dirs) {
dirNames = append(dirNames, uri.Filename())
}
sort.Strings(dirNames)
if len(dirNames) > 0 {
// There may be thousands.
knownSubdirs := s.getKnownSubdirs(dirs)
if n := len(knownSubdirs); n > 0 {
dirNames := make([]string, 0, n)
for _, uri := range knownSubdirs {
dirNames = append(dirNames, uri.Filename())
}
sort.Strings(dirNames)
// The double allocation of Sprintf(Join()) accounts for 8%
// of DidChange, but specializing doesn't appear to help. :(
patterns[fmt.Sprintf("{%s}", strings.Join(dirNames, ","))] = struct{}{}
}

return patterns
}

Expand Down Expand Up @@ -874,7 +882,7 @@ func (s *snapshot) getKnownSubdirs(wsDirs []span.URI) []span.URI {
}
s.unprocessedSubdirChanges = nil

var result []span.URI
result := make([]span.URI, 0, len(s.knownSubdirs))
for uri := range s.knownSubdirs {
result = append(result, uri)
}
Expand Down Expand Up @@ -1719,7 +1727,7 @@ func (s *snapshot) clone(ctx, bgCtx context.Context, changes map[span.URI]*fileC
packages: make(map[packageKey]*packageHandle, len(s.packages)),
actions: make(map[actionKey]*actionHandle, len(s.actions)),
files: make(map[span.URI]source.VersionedFileHandle, len(s.files)),
goFiles: make(map[parseKey]*parseGoHandle, len(s.goFiles)),
goFiles: s.goFiles.clone(),
symbols: make(map[span.URI]*symbolHandle, len(s.symbols)),
workspacePackages: make(map[PackageID]PackagePath, len(s.workspacePackages)),
unloadableFiles: make(map[span.URI]struct{}, len(s.unloadableFiles)),
Expand Down Expand Up @@ -1764,12 +1772,27 @@ func (s *snapshot) clone(ctx, bgCtx context.Context, changes map[span.URI]*fileC
result.parseWorkHandles[k] = v
}

for k, v := range s.goFiles {
if _, ok := changes[k.file.URI]; ok {
continue
// Copy the handles of all Go source files.
// There may be tens of thousands of files,
// but changes are typically few, so we
// use a striped map optimized for this case
// and visit its stripes in parallel.
var (
toDeleteMu sync.Mutex
toDelete []parseKey
)
s.goFiles.forEachConcurrent(func(k parseKey, v *parseGoHandle) {
if changes[k.file.URI] == nil {
// no change (common case)
newGen.Inherit(v.handle)
} else {
toDeleteMu.Lock()
toDelete = append(toDelete, k)
toDeleteMu.Unlock()
}
newGen.Inherit(v.handle)
result.goFiles[k] = v
})
for _, k := range toDelete {
result.goFiles.delete(k)
}

// Copy all of the go.mod-related handles. They may be invalidated later,
Expand Down Expand Up @@ -1975,21 +1998,34 @@ func (s *snapshot) clone(ctx, bgCtx context.Context, changes map[span.URI]*fileC
deleteInvalidMetadata := forceReloadMetadata || workspaceModeChanged
idsInSnapshot := map[PackageID]bool{} // track all known IDs
for uri, ids := range s.meta.ids {
var resultIDs []PackageID
for _, id := range ids {
// Optimization: ids slices are typically numerous, short (<3),
// and rarely modified by this loop, so don't allocate copies
// until necessary.
var resultIDs []PackageID // nil implies equal to ids[:i:i]
for i, id := range ids {
if skipID[id] || deleteInvalidMetadata && idsToInvalidate[id] {
resultIDs = ids[:i:i] // unshare
continue
}
// The ID is not reachable from any workspace package, so it should
// be deleted.
if !reachableID[id] {
resultIDs = ids[:i:i] // unshare
continue
}
idsInSnapshot[id] = true
resultIDs = append(resultIDs, id)
if resultIDs != nil {
resultIDs = append(resultIDs, id)
}
}
if resultIDs == nil {
resultIDs = ids
}
result.meta.ids[uri] = resultIDs
}
// TODO(adonovan): opt: represent PackageID as an index into a process-global
// dup-free list of all package names ever seen, then use a bitmap instead of
// a hash table for "PackageSet" (e.g. idsInSnapshot).

// Copy the package metadata. We only need to invalidate packages directly
// containing the affected file, and only if it changed in a relevant way.
Expand Down Expand Up @@ -2259,7 +2295,7 @@ func metadataChanges(ctx context.Context, lockedSnapshot *snapshot, oldFH, newFH
// lockedSnapshot must be locked.
func peekOrParse(ctx context.Context, lockedSnapshot *snapshot, fh source.FileHandle, mode source.ParseMode) (*source.ParsedGoFile, error) {
key := parseKey{file: fh.FileIdentity(), mode: mode}
if pgh := lockedSnapshot.goFiles[key]; pgh != nil {
if pgh := lockedSnapshot.goFiles.get(key); pgh != nil {
cached := pgh.handle.Cached(lockedSnapshot.generation)
if cached != nil {
cached := cached.(*parseGoData)
Expand Down Expand Up @@ -2547,3 +2583,107 @@ func readGoSum(dst map[module.Version][]string, file string, data []byte) error
}
return nil
}

// -- goFileMap --

// A goFileMap is conceptually a map[parseKey]*parseGoHandle,
// optimized for cloning all or nearly all entries.
type goFileMap struct {
// The map is represented as a map of 256 stripes, one per
// distinct value of the top 8 bits of key.file.Hash.
// Each stripe has an associated boolean indicating whether it
// is shared, and thus immutable, and thus must be copied before any update.
// (The bits could be packed but it hasn't been worth it yet.)
stripes [256]map[parseKey]*parseGoHandle
exclusive [256]bool // exclusive[i] means stripe[i] is not shared and may be safely mutated
}

// newGoFileMap returns a new empty goFileMap.
func newGoFileMap() *goFileMap {
return new(goFileMap) // all stripes are shared (non-exclusive) nil maps
}

// clone returns a copy of m.
// For concurrency, it counts as an update to m.
func (m *goFileMap) clone() *goFileMap {
m.exclusive = [256]bool{} // original and copy are now nonexclusive
copy := *m
return &copy
}

// get returns the value for key k.
func (m *goFileMap) get(k parseKey) *parseGoHandle {
return m.stripes[m.hash(k)][k]
}

// set updates the value for key k to v.
func (m *goFileMap) set(k parseKey, v *parseGoHandle) {
m.unshare(k)[k] = v
}

// delete deletes the value for key k, if any.
func (m *goFileMap) delete(k parseKey) {
// TODO(adonovan): opt?: skip unshare if k isn't present.
delete(m.unshare(k), k)
}

// forEachConcurrent calls f for each entry in the map.
// Calls may be concurrent.
// f must not modify m.
func (m *goFileMap) forEachConcurrent(f func(parseKey, *parseGoHandle)) {
// Visit stripes in parallel chunks.
const p = 16 // concurrency level
var wg sync.WaitGroup
wg.Add(p)
for i := 0; i < p; i++ {
chunk := m.stripes[i*p : (i+1)*p]
go func() {
for _, stripe := range chunk {
for k, v := range stripe {
f(k, v)
}
}
wg.Done()
}()
}
wg.Wait()
}

// -- internal--

// hash returns 8 bits from the key's file digest.
func (m *goFileMap) hash(k parseKey) int {
h := k.file.Hash
if h == "" {
// Sadly the Hash isn't always a hash because cache.GetFile may
// successfully return a *fileHandle containing an error and no hash.
// Lump the duds together for now.
// TODO(adonovan): fix the underlying bug.
return 0
}
return unhex(h[0])<<4 | unhex(h[1])
}

// unhex returns the value of a valid hex digit.
func unhex(b byte) int {
if '0' <= b && b <= '9' {
return int(b - '0')
}
return int(b) & ^0x20 - 'A' + 0xA // [a-fA-F]
}

// unshare makes k's stripe exclusive, allocating a copy if needed, and returns it.
func (m *goFileMap) unshare(k parseKey) map[parseKey]*parseGoHandle {
i := m.hash(k)
if !m.exclusive[i] {
m.exclusive[i] = true

// Copy the map.
copy := make(map[parseKey]*parseGoHandle, len(m.stripes[i]))
for k, v := range m.stripes[i] {
copy[k] = v
}
m.stripes[i] = copy
}
return m.stripes[i]
}
2 changes: 1 addition & 1 deletion internal/lsp/source/view.go
Original file line number Diff line number Diff line change
Expand Up @@ -532,7 +532,7 @@ type FileHandle interface {
type FileIdentity struct {
URI span.URI

// Identifier represents a unique identifier for the file's content.
// Hash is a string of hex digits denoting the cryptographic digest of the file's content.
Hash string
}

Expand Down
21 changes: 10 additions & 11 deletions internal/memoize/memoize.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,19 +234,18 @@ func (s *Store) DebugOnlyIterate(f func(k, v interface{})) {
}
}

func (g *Generation) Inherit(hs ...*Handle) {
for _, h := range hs {
if atomic.LoadUint32(&g.destroyed) != 0 {
panic("inherit on generation " + g.name + " destroyed by " + g.destroyedBy)
}
// Inherit makes h valid in generation g. It is concurrency-safe.
func (g *Generation) Inherit(h *Handle) {
if atomic.LoadUint32(&g.destroyed) != 0 {
panic("inherit on generation " + g.name + " destroyed by " + g.destroyedBy)
}

h.mu.Lock()
if h.state == stateDestroyed {
panic(fmt.Sprintf("inheriting destroyed handle %#v (type %T) into generation %v", h.key, h.key, g.name))
}
h.generations[g] = struct{}{}
h.mu.Unlock()
h.mu.Lock()
if h.state == stateDestroyed {
panic(fmt.Sprintf("inheriting destroyed handle %#v (type %T) into generation %v", h.key, h.key, g.name))
}
h.generations[g] = struct{}{}
h.mu.Unlock()
}

// Cached returns the value associated with a handle.
Expand Down
3 changes: 2 additions & 1 deletion internal/memoize/memoize_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ func TestCleanup(t *testing.T) {
expectGet(t, h1, g1, &v1)
expectGet(t, h2, g1, &v2)
g2 := s.Generation("g2")
g2.Inherit(h1, h2)
g2.Inherit(h1)
g2.Inherit(h2)

g1.Destroy("TestCleanup")
expectGet(t, h1, g2, &v1)
Expand Down
8 changes: 8 additions & 0 deletions internal/span/uri.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ func (uri URI) Filename() string {
}

func filename(uri URI) (string, error) {
// This function is frequently called and its cost is
// dominated by the allocation of a net.URL.
// TODO(adonovan): opt: replace by a bespoke parseFileURI
// function that doesn't allocate.
if uri == "" {
return "", nil
}
Expand Down Expand Up @@ -80,6 +84,10 @@ func URIFromURI(s string) URI {
return URI(u.String())
}

// CompareURI performs a three-valued comparison of two URIs.
// Lexically unequal URIs may compare equal if they are "file:" URIs
// that share the same base name (ignoring case) and denote the same
// file device/inode, according to stat(2).
func CompareURI(a, b URI) int {
if equalURI(a, b) {
return 0
Expand Down

0 comments on commit 9651276

Please sign in to comment.