diff --git a/gopls/internal/astutil/purge.go b/gopls/internal/astutil/purge.go new file mode 100644 index 00000000000..cec428842a2 --- /dev/null +++ b/gopls/internal/astutil/purge.go @@ -0,0 +1,74 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package astutil provides various AST utility functions for gopls. +package astutil + +import ( + "bytes" + "go/scanner" + "go/token" + + "golang.org/x/tools/gopls/internal/lsp/safetoken" +) + +// PurgeFuncBodies returns a copy of src in which the contents of each +// outermost {...} region except struct and interface types have been +// deleted. This reduces the amount of work required to parse the +// top-level declarations. +// +// PurgeFuncBodies does not preserve newlines or position information. +// Also, if the input is invalid, parsing the output of +// PurgeFuncBodies may result in a different tree due to its effects +// on parser error recovery. +func PurgeFuncBodies(src []byte) []byte { + // Destroy the content of any {...}-bracketed regions that are + // not immediately preceded by a "struct" or "interface" + // token. That includes function bodies, composite literals, + // switch/select bodies, and all blocks of statements. + // This will lead to non-void functions that don't have return + // statements, which of course is a type error, but that's ok. + + var out bytes.Buffer + file := token.NewFileSet().AddFile("", -1, len(src)) + var sc scanner.Scanner + sc.Init(file, src, nil, 0) + var prev token.Token + var cursor int // last consumed src offset + var braces []token.Pos // stack of unclosed braces or -1 for struct/interface type + for { + pos, tok, _ := sc.Scan() + if tok == token.EOF { + break + } + switch tok { + case token.COMMENT: + // TODO(adonovan): opt: skip, to save an estimated 20% of time. + + case token.LBRACE: + if prev == token.STRUCT || prev == token.INTERFACE { + pos = -1 + } + braces = append(braces, pos) + + case token.RBRACE: + if last := len(braces) - 1; last >= 0 { + top := braces[last] + braces = braces[:last] + if top < 0 { + // struct/interface type: leave alone + } else if len(braces) == 0 { // toplevel only + // Delete {...} body. + start, _ := safetoken.Offset(file, top) + end, _ := safetoken.Offset(file, pos) + out.Write(src[cursor : start+len("{")]) + cursor = end + } + } + } + prev = tok + } + out.Write(src[cursor:]) + return out.Bytes() +} diff --git a/gopls/internal/astutil/purge_test.go b/gopls/internal/astutil/purge_test.go new file mode 100644 index 00000000000..52d6cc2b256 --- /dev/null +++ b/gopls/internal/astutil/purge_test.go @@ -0,0 +1,86 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package astutil_test + +import ( + "go/ast" + "go/parser" + "go/token" + "os" + "reflect" + "testing" + + "golang.org/x/tools/go/packages" + "golang.org/x/tools/gopls/internal/astutil" +) + +// TestPurgeFuncBodies tests PurgeFuncBodies by comparing it against a +// (less efficient) reference implementation that purges after parsing. +func TestPurgeFuncBodies(t *testing.T) { + // Load a few standard packages. + config := packages.Config{Mode: packages.NeedCompiledGoFiles} + pkgs, err := packages.Load(&config, "encoding/...") + if err != nil { + t.Fatal(err) + } + + // preorder returns the nodes of tree f in preorder. + preorder := func(f *ast.File) (nodes []ast.Node) { + ast.Inspect(f, func(n ast.Node) bool { + if n != nil { + nodes = append(nodes, n) + } + return true + }) + return nodes + } + + packages.Visit(pkgs, nil, func(p *packages.Package) { + for _, filename := range p.CompiledGoFiles { + content, err := os.ReadFile(filename) + if err != nil { + t.Fatal(err) + } + + fset := token.NewFileSet() + + // Parse then purge (reference implementation). + f1, _ := parser.ParseFile(fset, filename, content, 0) + ast.Inspect(f1, func(n ast.Node) bool { + switch n := n.(type) { + case *ast.FuncDecl: + if n.Body != nil { + n.Body.List = nil + } + case *ast.FuncLit: + n.Body.List = nil + case *ast.CompositeLit: + n.Elts = nil + } + return true + }) + + // Purge before parse (logic under test). + f2, _ := parser.ParseFile(fset, filename, astutil.PurgeFuncBodies(content), 0) + + // Compare sequence of node types. + nodes1 := preorder(f1) + nodes2 := preorder(f2) + if len(nodes2) < len(nodes1) { + t.Errorf("purged file has fewer nodes: %d vs %d", + len(nodes2), len(nodes1)) + nodes1 = nodes1[:len(nodes2)] // truncate + } + for i := range nodes1 { + x, y := nodes1[i], nodes2[i] + if reflect.TypeOf(x) != reflect.TypeOf(y) { + t.Errorf("%s: got %T, want %T", + fset.Position(x.Pos()), y, x) + break + } + } + } + }) +} diff --git a/gopls/internal/lsp/source/completion/completion.go b/gopls/internal/lsp/source/completion/completion.go index 767ac8e0343..c11b02454e5 100644 --- a/gopls/internal/lsp/source/completion/completion.go +++ b/gopls/internal/lsp/source/completion/completion.go @@ -7,7 +7,6 @@ package completion import ( - "bytes" "context" "fmt" "go/ast" @@ -27,6 +26,7 @@ import ( "golang.org/x/sync/errgroup" "golang.org/x/tools/go/ast/astutil" + goplsastutil "golang.org/x/tools/gopls/internal/astutil" "golang.org/x/tools/gopls/internal/lsp/protocol" "golang.org/x/tools/gopls/internal/lsp/safetoken" "golang.org/x/tools/gopls/internal/lsp/snippet" @@ -3176,7 +3176,7 @@ func isSlice(obj types.Object) bool { // quick partial parse. fn is non-nil only for function declarations. // The AST position information is garbage. func forEachPackageMember(content []byte, f func(tok token.Token, id *ast.Ident, fn *ast.FuncDecl)) { - purged := purgeFuncBodies(content) + purged := goplsastutil.PurgeFuncBodies(content) file, _ := parser.ParseFile(token.NewFileSet(), "", purged, 0) for _, decl := range file.Decls { switch decl := decl.(type) { @@ -3198,58 +3198,3 @@ func forEachPackageMember(content []byte, f func(tok token.Token, id *ast.Ident, } } } - -// purgeFuncBodies returns a copy of src in which the contents of each -// outermost {...} region except struct and interface types have been -// deleted. It does not preserve newlines. This reduces the amount of -// work required to parse the top-level declarations. -func purgeFuncBodies(src []byte) []byte { - // Destroy the content of any {...}-bracketed regions that are - // not immediately preceded by a "struct" or "interface" - // token. That includes function bodies, composite literals, - // switch/select bodies, and all blocks of statements. - // This will lead to non-void functions that don't have return - // statements, which of course is a type error, but that's ok. - - var out bytes.Buffer - file := token.NewFileSet().AddFile("", -1, len(src)) - var sc scanner.Scanner - sc.Init(file, src, nil, 0) - var prev token.Token - var cursor int // last consumed src offset - var braces []token.Pos // stack of unclosed braces or -1 for struct/interface type - for { - pos, tok, _ := sc.Scan() - if tok == token.EOF { - break - } - switch tok { - case token.COMMENT: - // TODO(adonovan): opt: skip, to save an estimated 20% of time. - - case token.LBRACE: - if prev == token.STRUCT || prev == token.INTERFACE { - pos = -1 - } - braces = append(braces, pos) - - case token.RBRACE: - if last := len(braces) - 1; last >= 0 { - top := braces[last] - braces = braces[:last] - if top < 0 { - // struct/interface type: leave alone - } else if len(braces) == 0 { // toplevel only - // Delete {...} body. - start, _ := safetoken.Offset(file, top) - end, _ := safetoken.Offset(file, pos) - out.Write(src[cursor : start+len("{")]) - cursor = end - } - } - } - prev = tok - } - out.Write(src[cursor:]) - return out.Bytes() -} diff --git a/gopls/internal/lsp/source/typerefs/pkgrefs_test.go b/gopls/internal/lsp/source/typerefs/pkgrefs_test.go index 9c586f8195c..a78bab7dcf4 100644 --- a/gopls/internal/lsp/source/typerefs/pkgrefs_test.go +++ b/gopls/internal/lsp/source/typerefs/pkgrefs_test.go @@ -20,6 +20,7 @@ import ( "golang.org/x/tools/go/gcexportdata" "golang.org/x/tools/go/packages" + "golang.org/x/tools/gopls/internal/astutil" "golang.org/x/tools/gopls/internal/lsp/cache" "golang.org/x/tools/gopls/internal/lsp/source" "golang.org/x/tools/gopls/internal/lsp/source/typerefs" @@ -286,10 +287,12 @@ func newParser() *memoizedParser { func (p *memoizedParser) parse(ctx context.Context, uri span.URI) (*ParsedGoFile, error) { doParse := func(ctx context.Context, uri span.URI) (*ParsedGoFile, error) { + // TODO(adonovan): hoist this operation outside the benchmark critsec. content, err := os.ReadFile(uri.Filename()) if err != nil { return nil, err } + content = astutil.PurgeFuncBodies(content) pgf, _ := cache.ParseGoSrc(ctx, token.NewFileSet(), uri, content, source.ParseFull) return pgf, nil } diff --git a/gopls/internal/lsp/source/typerefs/refs.go b/gopls/internal/lsp/source/typerefs/refs.go index 9f7c835c1ba..000a2f4a34c 100644 --- a/gopls/internal/lsp/source/typerefs/refs.go +++ b/gopls/internal/lsp/source/typerefs/refs.go @@ -46,14 +46,16 @@ func Refs(pgfs []*source.ParsedGoFile, id source.PackageID, imports map[source.I // slice because multiple packages may be referenced by a given name in the // presence of type errors (or multiple dot imports, which are keyed by // "."). - localImports = make(map[*ast.File]map[string][]*source.Metadata) + localImports = make(map[*ast.File]map[string][]source.PackageID) ) // Scan top-level declarations once to collect local import names and // declInfo for each non-import declaration. for _, pgf := range pgfs { file := pgf.File - localImports[file] = make(map[string][]*source.Metadata) + fileImports := make(map[string][]source.PackageID) + localImports[file] = fileImports + for _, d := range file.Decls { switch d := d.(type) { case *ast.GenDecl: @@ -80,28 +82,29 @@ func Refs(pgfs []*source.ParsedGoFile, id source.PackageID, imports map[source.I } name = spec.Name.Name // possibly "." } - localImports[file][name] = append(localImports[file][name], dep) + fileImports[name] = append(fileImports[name], dep.ID) } case token.TYPE: for _, spec := range d.Specs { spec := spec.(*ast.TypeSpec) - if spec.Name.Name == "_" { + name := spec.Name.Name + if name == "_" { continue } - info := decls[spec.Name.Name] // TODO(rfindley): handle the case of duplicate decls. + info := decls[name] // TODO(rfindley): handle the case of duplicate decls. if info == nil { info = &declInfo{} - decls[spec.Name.Name] = info + decls[name] = info } // Sanity check that the root node info has not been set. // // TODO(rfindley): this panic is incorrect in the presence of // invalid code with duplicate decls. if info.node != nil || info.tparams != nil { - panic(fmt.Sprintf("root node already set for %s.%s", id, spec.Name.Name)) + panic(fmt.Sprintf("root node already set for %s.%s", id, name)) } - info.name = spec.Name.Name + info.name = name info.file = file info.node = spec info.tparams = tparamsMap(typeparams.ForTypeSpec(spec)) @@ -172,18 +175,23 @@ func Refs(pgfs []*source.ParsedGoFile, id source.PackageID, imports map[source.I // recordEdge records the (id, name)->(id2, name) edge. recordEdge := func(id2 source.PackageID, name2 string) { - if mappedRefs[name] == nil { - mappedRefs[name] = make(map[source.PackageID]map[string]bool) + pkgRefs, ok := mappedRefs[name] + if !ok { + pkgRefs = make(map[source.PackageID]map[string]bool) + mappedRefs[name] = pkgRefs } - if mappedRefs[name][id2] == nil { - mappedRefs[name][id2] = make(map[string]bool) + names, ok := pkgRefs[id2] + if !ok { + names = make(map[string]bool) + pkgRefs[id2] = names } - mappedRefs[name][id2][name2] = true + names[name2] = true } for _, info := range infos { - localImports := localImports[info.file] + fileImports := localImports[info.file] + // Visit each reference to name or name.sel. visitDeclOrSpec(info.node, func(name, sel string) { if info.tparams[name] { return @@ -215,18 +223,18 @@ func Refs(pgfs []*source.ParsedGoFile, id source.PackageID, imports map[source.I // // Just record edges to both. if token.IsExported(name) { - for _, dep := range localImports["."] { + for _, depID := range fileImports["."] { // Conservatively, assume that this name comes from every // dot-imported package. - recordEdge(dep.ID, name) + recordEdge(depID, name) } } // Similarly, if sel is exported we record an edge for every matching // import. if sel != "" && token.IsExported(sel) { - for _, dep := range localImports[name] { - recordEdge(dep.ID, sel) + for _, depID := range fileImports[name] { + recordEdge(depID, sel) } } }) @@ -242,9 +250,9 @@ func Refs(pgfs []*source.ParsedGoFile, id source.PackageID, imports map[source.I sort.Strings(names) for _, name := range names { fmt.Printf("\t-> %s\n", name) - for id2, names2 := range mappedRefs[name] { + for id2, pkgRefs := range mappedRefs[name] { var ns []string - for n := range names2 { + for n := range pkgRefs { ns = append(ns, n) } sort.Strings(ns) @@ -333,17 +341,31 @@ func visitDeclOrSpec(node ast.Node, f refVisitor) { // visitExpr can't reliably distinguish a dotted ident pkg.X from a // selection expr.f or T.method. func visitExpr(expr ast.Expr, f refVisitor) { - // walk children - // (the order of the cases matches the order - // of the corresponding node types in ast.go) switch n := expr.(type) { - // Expressions - case *ast.BadExpr, *ast.BasicLit: - // nothing to do - + // These four cases account for about two thirds of all nodes, + // so we place them first to shorten the common control paths. + // (See go.dev/cl/480915.) case *ast.Ident: f(n.Name, "") + case *ast.BasicLit: + // nothing to do + + case *ast.SelectorExpr: + if ident, ok := n.X.(*ast.Ident); ok { + f(ident.Name, n.Sel.Name) + } else { + visitExpr(n.X, f) + // Skip n.Sel as we don't care about which field or method is selected, + // as we'll have recorded an edge to all declarations relevant to the + // receiver type via visiting n.X above. + } + + case *ast.CallExpr: + visitExpr(n.Fun, f) + visitExprList(n.Args, f) // args affect types for unsafe.Sizeof or builtins or generics + + // Expressions case *ast.Ellipsis: if n.Elt != nil { visitExpr(n.Elt, f) @@ -362,16 +384,6 @@ func visitExpr(expr ast.Expr, f refVisitor) { case *ast.ParenExpr: visitExpr(n.X, f) - case *ast.SelectorExpr: - if ident, ok := n.X.(*ast.Ident); ok { - f(ident.Name, n.Sel.Name) - } else { - visitExpr(n.X, f) - // Skip n.Sel as we don't care about which field or method is selected, - // as we'll have recorded an edge to all declarations relevant to the - // receiver type via visiting n.X above. - } - case *ast.IndexExpr: visitExpr(n.X, f) visitExpr(n.Index, f) // may affect type for instantiations @@ -393,10 +405,6 @@ func visitExpr(expr ast.Expr, f refVisitor) { visitExpr(n.Type, f) } - case *ast.CallExpr: - visitExpr(n.Fun, f) - visitExprList(n.Args, f) // args affect types for unsafe.Sizeof or builtins or generics - case *ast.StarExpr: visitExpr(n.X, f) @@ -439,6 +447,10 @@ func visitExpr(expr ast.Expr, f refVisitor) { case *ast.ChanType: visitExpr(n.Value, f) + + case *ast.BadExpr: + // nothing to do + default: panic(fmt.Sprintf("ast.Walk: unexpected node type %T", n)) }