Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add suggestions to most parser diagnostics #431

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
155 changes: 153 additions & 2 deletions experimental/parser/diagnostics_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,16 @@
package parser

import (
"fmt"
"unicode"
"unicode/utf8"

"github.com/bufbuild/protocompile/experimental/internal/taxa"
"github.com/bufbuild/protocompile/experimental/report"
"github.com/bufbuild/protocompile/experimental/token"
"github.com/bufbuild/protocompile/internal/ext/iterx"
"github.com/bufbuild/protocompile/internal/ext/slicesx"
"github.com/bufbuild/protocompile/internal/ext/stringsx"
)

// errUnexpected is a low-level parser error for when we hit a token we don't
Expand All @@ -36,6 +44,27 @@ type errUnexpected struct {
// description.
want taxa.Set
got any

// If nonempty, inserting this text will be suggested at the given offset.
insert string
insertAt int
insertJustify int
stream *token.Stream
}

// errUnexpectedEOF is a helper for constructing EOF diagnostics that need to
// provide *no* suggestions. This is used in places where any suggestion we
// could provide would be nonsensical.
func errUnexpectedEOF(c *token.Cursor, where taxa.Place) errUnexpected {
tok, span := c.Clone().SeekToEnd()
if tok.IsZero() {
return errUnexpected{
what: span,
where: where,
got: taxa.EOF,
}
}
return errUnexpected{what: tok, where: where}
}

func (e errUnexpected) Diagnose(d *report.Diagnostic) {
Expand All @@ -54,16 +83,30 @@ func (e errUnexpected) Diagnose(d *report.Diagnostic) {
message = report.Message("unexpected %v %v", got, e.where)
}

snippet := report.Snippet(e.what)
what := e.what.Span()
snippet := report.Snippet(what)
if e.want.Len() > 0 {
snippet = report.Snippetf(e.what, "expected %v", e.want.Join("or"))
snippet = report.Snippetf(what, "expected %v", e.want.Join("or"))
}

d.Apply(
message,
snippet,
report.Snippetf(e.prev, "previous %v is here", e.where.Subject()),
)

if e.insert != "" {
want, _ := iterx.First(e.want.All())
d.Apply(justify(
e.stream,
what,
fmt.Sprintf("consider inserting a %v", want),
justified{
report.Edit{Replace: e.insert},
e.insertJustify,
},
))
}
}

// errMoreThanOne is used to diagnose the occurrence of some construct more
Expand All @@ -85,3 +128,111 @@ func (e errMoreThanOne) Diagnose(d *report.Diagnostic) {
report.Snippetf(e.first, "first one is here"),
)
}

const (
justifyNone int = iota
justifyBetween
justifyRight
justifyLeft
)

type justified struct {
report.Edit
justify int
}

func justify(stream *token.Stream, span report.Span, message string, edits ...justified) report.DiagnosticOption {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is certainly not what I had in mind when you said remove the justification and replace it with helpers. I was thinking of two or three specific helpers for finding the right insertion points, possibly looking backwards in the token stream (for "left").

It seems like the biggest change made was to remove doc comments, which isn't an improvement at all.

Could you at least provide some doc comments that describe why this is useful or necessary along with examples of the various values? It's just not intuitive that this level of complexity is necessary. It seemed to me that a helper to find the end of the previous token for an insert position might have been sufficient, but maybe my imagination is failing me.

(Having said that, I do appreciate that this change means the justify param is no longer in the proto, and instead the suggested edit in the proto has the properly resolved edit locations.)

text := span.File.Text()
for i := range edits {
e := &edits[i]
// Convert the edits to absolute offsets.
e.Start += span.Start
e.End += span.Start
empty := e.Start == e.End

spaceAfter := func(text string, idx int) int {
r, ok := stringsx.Rune(text, idx)
if !ok || !unicode.IsSpace(r) {
return 0
}
return utf8.RuneLen(r)
}
spaceBefore := func(text string, idx int) int {
r, ok := stringsx.PrevRune(text, idx)
if !ok || !unicode.IsSpace(r) {
return 0
}
return utf8.RuneLen(r)
}

switch edits[i].justify {
case justifyBetween:
// If possible, shift the offset such that it is surrounded by
// whitespace. However, this is not always possible, in which case we
// must add whitespace to text.
prev := spaceBefore(text, e.Start)
next := spaceAfter(text, e.End)
switch {
case prev > 0 && next > 0:
// Nothing to do here.

case empty && prev > 0 && spaceBefore(text, e.Start-prev) > 0:
e.Start -= prev
e.End -= prev
case prev > 0:
e.Replace += " "

case empty && next > 0 && spaceAfter(text, e.End+next) > 0:
e.Start += next
e.End += next
case next > 0:
e.Replace = " " + e.Replace

default:
// We're crammed between non-whitespace.
e.Replace = " " + e.Replace + " "
}

case justifyLeft:
// Get the token at the start of the span.
start, end := stream.Around(e.Start)
if end.IsZero() {
break
}

c := token.NewCursorAt(start)
// Seek to the previous unskippable token, and use its end as
// the start of the justification.
e.Start = c.Prev().Span().End
if empty {
e.End = e.Start
}

case justifyRight:
// Identical to the above, but reversed.
start, end := stream.Around(e.Start)
if start.IsZero() {
break
}

c := token.NewCursorAt(end)
e.End = c.Next().Span().Start
if empty {
e.Start = e.End
}
}
}

span = report.JoinSeq(iterx.Map(slicesx.Values(edits), func(j justified) report.Span {
return span.File.Span(j.Start, j.End)
}))
return report.SuggestEdits(
span, message,
slicesx.Collect(iterx.Map(slicesx.Values(edits),
func(j justified) report.Edit {
j.Start -= span.Start
j.End -= span.Start
return j.Edit
}))...,
)
}
26 changes: 19 additions & 7 deletions experimental/parser/lex_string.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ func lexString(l *lexer) token.Token {
}

cursor := l.cursor
sc := lexStringContent(l)
sc := lexStringContent(quote, l)
if sc.isEscape && !haveEsc {
// If we saw our first escape, spill the string into the buffer
// up to just before the escape.
Expand Down Expand Up @@ -79,14 +79,20 @@ type stringContent struct {

// lexStringContent lexes a single logical rune's worth of content for a quoted
// string.
func lexStringContent(l *lexer) (sc stringContent) {
func lexStringContent(_ rune, l *lexer) (sc stringContent) {
start := l.cursor
r := l.Pop()

switch {
case r == 0:
esc := l.SpanFrom(l.cursor - utf8.RuneLen(r))
l.Errorf("unescaped NUL bytes are not permitted in string literals").Apply(
report.Snippetf(l.SpanFrom(l.cursor-utf8.RuneLen(r)), "replace this with `\\0` or `\\x00`"),
report.Snippet(esc),
report.SuggestEdits(esc, "replace it with `\\0` or `\\x00`", report.Edit{
Start: 0,
End: 1,
Replace: "\\0",
}),
)
case r == '\n':
// TODO: This diagnostic is simply user-hostile. We should remove it.
Expand All @@ -98,10 +104,10 @@ func lexStringContent(l *lexer) (sc stringContent) {
// Many programming languages have since thoughtlessly copied this
// choice, including Protobuf, whose lexical morphology is almost
// exactly C's).
nl := l.SpanFrom(l.cursor - utf8.RuneLen(r))
l.Errorf("unescaped newlines are not permitted in string literals").Apply(
// Not to mention, this diagnostic is not ideal: we should probably
// tell users to split the string into multiple quoted fragments.
report.Snippetf(l.SpanFrom(l.cursor-utf8.RuneLen(r)), "replace this with `\\n`"),
report.Snippet(nl),
report.Helpf("consider splitting this into two adjacent string literals"),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There may be more than one newline, so hard-coding a count of two adjacent literals doesn't seem right.

Suggested change
report.Helpf("consider splitting this into two adjacent string literals"),
report.Helpf("consider splitting this into adjacent string literals"),

)
case report.NonPrint(r):
// Warn if the user has a non-printable character in their string that isn't
Expand All @@ -116,8 +122,14 @@ func lexStringContent(l *lexer) (sc stringContent) {
escape = fmt.Sprintf(`\U%08x`, r)
}

esc := l.SpanFrom(l.cursor - utf8.RuneLen(r))
l.Warnf("non-printable character in string literal").Apply(
report.Snippetf(l.SpanFrom(l.cursor-utf8.RuneLen(r)), "help: consider escaping this with e.g. `%s` instead", escape),
report.Snippet(esc),
report.SuggestEdits(esc, "consider escaping it", report.Edit{
Start: 0,
End: len(esc.Text()),
Replace: escape,
}),
)
}

Expand Down
Loading
Loading