Skip to content

Commit

Permalink
gopls/internal/lsp/source/typerefs: purge func bodies before parsing
Browse files Browse the repository at this point in the history
This change reuses the purgeFuncBodies optimization from
completion (now moved into a new package) to reduce the amount
of work and allocation done by the parser in typerefs.

Now that purge is a unit, it has a unit test.

Also, minor tweaks:
- strength-reduce localImports value to a PackageID
- opt: avoid redundant map lookups
- opt: put common ast.Node switch cases first.

Benchmark time is reduced by about a third.

Change-Id: I397a09d6364914df48fd6caa78767dc266dad862
Reviewed-on: https://go-review.googlesource.com/c/tools/+/480917
Run-TryBot: Alan Donovan <[email protected]>
TryBot-Result: Gopher Robot <[email protected]>
Auto-Submit: Alan Donovan <[email protected]>
gopls-CI: kokoro <[email protected]>
Reviewed-by: Robert Findley <[email protected]>
  • Loading branch information
findleyr authored and adonovan committed Mar 31, 2023
1 parent 58c9a63 commit 902fdca
Show file tree
Hide file tree
Showing 5 changed files with 218 additions and 98 deletions.
74 changes: 74 additions & 0 deletions gopls/internal/astutil/purge.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package astutil provides various AST utility functions for gopls.
package astutil

import (
"bytes"
"go/scanner"
"go/token"

"golang.org/x/tools/gopls/internal/lsp/safetoken"
)

// PurgeFuncBodies returns a copy of src in which the contents of each
// outermost {...} region except struct and interface types have been
// deleted. This reduces the amount of work required to parse the
// top-level declarations.
//
// PurgeFuncBodies does not preserve newlines or position information.
// Also, if the input is invalid, parsing the output of
// PurgeFuncBodies may result in a different tree due to its effects
// on parser error recovery.
func PurgeFuncBodies(src []byte) []byte {
// Destroy the content of any {...}-bracketed regions that are
// not immediately preceded by a "struct" or "interface"
// token. That includes function bodies, composite literals,
// switch/select bodies, and all blocks of statements.
// This will lead to non-void functions that don't have return
// statements, which of course is a type error, but that's ok.

var out bytes.Buffer
file := token.NewFileSet().AddFile("", -1, len(src))
var sc scanner.Scanner
sc.Init(file, src, nil, 0)
var prev token.Token
var cursor int // last consumed src offset
var braces []token.Pos // stack of unclosed braces or -1 for struct/interface type
for {
pos, tok, _ := sc.Scan()
if tok == token.EOF {
break
}
switch tok {
case token.COMMENT:
// TODO(adonovan): opt: skip, to save an estimated 20% of time.

case token.LBRACE:
if prev == token.STRUCT || prev == token.INTERFACE {
pos = -1
}
braces = append(braces, pos)

case token.RBRACE:
if last := len(braces) - 1; last >= 0 {
top := braces[last]
braces = braces[:last]
if top < 0 {
// struct/interface type: leave alone
} else if len(braces) == 0 { // toplevel only
// Delete {...} body.
start, _ := safetoken.Offset(file, top)
end, _ := safetoken.Offset(file, pos)
out.Write(src[cursor : start+len("{")])
cursor = end
}
}
}
prev = tok
}
out.Write(src[cursor:])
return out.Bytes()
}
86 changes: 86 additions & 0 deletions gopls/internal/astutil/purge_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package astutil_test

import (
"go/ast"
"go/parser"
"go/token"
"os"
"reflect"
"testing"

"golang.org/x/tools/go/packages"
"golang.org/x/tools/gopls/internal/astutil"
)

// TestPurgeFuncBodies tests PurgeFuncBodies by comparing it against a
// (less efficient) reference implementation that purges after parsing.
func TestPurgeFuncBodies(t *testing.T) {
// Load a few standard packages.
config := packages.Config{Mode: packages.NeedCompiledGoFiles}
pkgs, err := packages.Load(&config, "encoding/...")
if err != nil {
t.Fatal(err)
}

// preorder returns the nodes of tree f in preorder.
preorder := func(f *ast.File) (nodes []ast.Node) {
ast.Inspect(f, func(n ast.Node) bool {
if n != nil {
nodes = append(nodes, n)
}
return true
})
return nodes
}

packages.Visit(pkgs, nil, func(p *packages.Package) {
for _, filename := range p.CompiledGoFiles {
content, err := os.ReadFile(filename)
if err != nil {
t.Fatal(err)
}

fset := token.NewFileSet()

// Parse then purge (reference implementation).
f1, _ := parser.ParseFile(fset, filename, content, 0)
ast.Inspect(f1, func(n ast.Node) bool {
switch n := n.(type) {
case *ast.FuncDecl:
if n.Body != nil {
n.Body.List = nil
}
case *ast.FuncLit:
n.Body.List = nil
case *ast.CompositeLit:
n.Elts = nil
}
return true
})

// Purge before parse (logic under test).
f2, _ := parser.ParseFile(fset, filename, astutil.PurgeFuncBodies(content), 0)

// Compare sequence of node types.
nodes1 := preorder(f1)
nodes2 := preorder(f2)
if len(nodes2) < len(nodes1) {
t.Errorf("purged file has fewer nodes: %d vs %d",
len(nodes2), len(nodes1))
nodes1 = nodes1[:len(nodes2)] // truncate
}
for i := range nodes1 {
x, y := nodes1[i], nodes2[i]
if reflect.TypeOf(x) != reflect.TypeOf(y) {
t.Errorf("%s: got %T, want %T",
fset.Position(x.Pos()), y, x)
break
}
}
}
})
}
59 changes: 2 additions & 57 deletions gopls/internal/lsp/source/completion/completion.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
package completion

import (
"bytes"
"context"
"fmt"
"go/ast"
Expand All @@ -27,6 +26,7 @@ import (

"golang.org/x/sync/errgroup"
"golang.org/x/tools/go/ast/astutil"
goplsastutil "golang.org/x/tools/gopls/internal/astutil"
"golang.org/x/tools/gopls/internal/lsp/protocol"
"golang.org/x/tools/gopls/internal/lsp/safetoken"
"golang.org/x/tools/gopls/internal/lsp/snippet"
Expand Down Expand Up @@ -3176,7 +3176,7 @@ func isSlice(obj types.Object) bool {
// quick partial parse. fn is non-nil only for function declarations.
// The AST position information is garbage.
func forEachPackageMember(content []byte, f func(tok token.Token, id *ast.Ident, fn *ast.FuncDecl)) {
purged := purgeFuncBodies(content)
purged := goplsastutil.PurgeFuncBodies(content)
file, _ := parser.ParseFile(token.NewFileSet(), "", purged, 0)
for _, decl := range file.Decls {
switch decl := decl.(type) {
Expand All @@ -3198,58 +3198,3 @@ func forEachPackageMember(content []byte, f func(tok token.Token, id *ast.Ident,
}
}
}

// purgeFuncBodies returns a copy of src in which the contents of each
// outermost {...} region except struct and interface types have been
// deleted. It does not preserve newlines. This reduces the amount of
// work required to parse the top-level declarations.
func purgeFuncBodies(src []byte) []byte {
// Destroy the content of any {...}-bracketed regions that are
// not immediately preceded by a "struct" or "interface"
// token. That includes function bodies, composite literals,
// switch/select bodies, and all blocks of statements.
// This will lead to non-void functions that don't have return
// statements, which of course is a type error, but that's ok.

var out bytes.Buffer
file := token.NewFileSet().AddFile("", -1, len(src))
var sc scanner.Scanner
sc.Init(file, src, nil, 0)
var prev token.Token
var cursor int // last consumed src offset
var braces []token.Pos // stack of unclosed braces or -1 for struct/interface type
for {
pos, tok, _ := sc.Scan()
if tok == token.EOF {
break
}
switch tok {
case token.COMMENT:
// TODO(adonovan): opt: skip, to save an estimated 20% of time.

case token.LBRACE:
if prev == token.STRUCT || prev == token.INTERFACE {
pos = -1
}
braces = append(braces, pos)

case token.RBRACE:
if last := len(braces) - 1; last >= 0 {
top := braces[last]
braces = braces[:last]
if top < 0 {
// struct/interface type: leave alone
} else if len(braces) == 0 { // toplevel only
// Delete {...} body.
start, _ := safetoken.Offset(file, top)
end, _ := safetoken.Offset(file, pos)
out.Write(src[cursor : start+len("{")])
cursor = end
}
}
}
prev = tok
}
out.Write(src[cursor:])
return out.Bytes()
}
3 changes: 3 additions & 0 deletions gopls/internal/lsp/source/typerefs/pkgrefs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (

"golang.org/x/tools/go/gcexportdata"
"golang.org/x/tools/go/packages"
"golang.org/x/tools/gopls/internal/astutil"
"golang.org/x/tools/gopls/internal/lsp/cache"
"golang.org/x/tools/gopls/internal/lsp/source"
"golang.org/x/tools/gopls/internal/lsp/source/typerefs"
Expand Down Expand Up @@ -286,10 +287,12 @@ func newParser() *memoizedParser {

func (p *memoizedParser) parse(ctx context.Context, uri span.URI) (*ParsedGoFile, error) {
doParse := func(ctx context.Context, uri span.URI) (*ParsedGoFile, error) {
// TODO(adonovan): hoist this operation outside the benchmark critsec.
content, err := os.ReadFile(uri.Filename())
if err != nil {
return nil, err
}
content = astutil.PurgeFuncBodies(content)
pgf, _ := cache.ParseGoSrc(ctx, token.NewFileSet(), uri, content, source.ParseFull)
return pgf, nil
}
Expand Down
Loading

0 comments on commit 902fdca

Please sign in to comment.