bufbuild · mcy · Jan 22, 2025 · Jan 21, 2025 · Jan 22, 2025 · Jan 21, 2025
diff --git a/experimental/parser/diagnostics_internal.go b/experimental/parser/diagnostics_internal.go
@@ -15,8 +15,16 @@
 package parser
 
 import (
+	"fmt"
+	"unicode"
+	"unicode/utf8"
+
 	"github.com/bufbuild/protocompile/experimental/internal/taxa"
 	"github.com/bufbuild/protocompile/experimental/report"
+	"github.com/bufbuild/protocompile/experimental/token"
+	"github.com/bufbuild/protocompile/internal/ext/iterx"
+	"github.com/bufbuild/protocompile/internal/ext/slicesx"
+	"github.com/bufbuild/protocompile/internal/ext/stringsx"
 )
 
 // errUnexpected is a low-level parser error for when we hit a token we don't
@@ -36,6 +44,27 @@ type errUnexpected struct {
 	// description.
 	want taxa.Set
 	got  any
+
+	// If nonempty, inserting this text will be suggested at the given offset.
+	insert        string
+	insertAt      int
+	insertJustify int
+	stream        *token.Stream
+}
+
+// errUnexpectedEOF is a helper for constructing EOF diagnostics that need to
+// provide *no* suggestions. This is used in places where any suggestion we
+// could provide would be nonsensical.
+func errUnexpectedEOF(c *token.Cursor, where taxa.Place) errUnexpected {
+	tok, span := c.Clone().SeekToEnd()
+	if tok.IsZero() {
+		return errUnexpected{
+			what:  span,
+			where: where,
+			got:   taxa.EOF,
+		}
+	}
+	return errUnexpected{what: tok, where: where}
 }
 
 func (e errUnexpected) Diagnose(d *report.Diagnostic) {
@@ -54,16 +83,30 @@ func (e errUnexpected) Diagnose(d *report.Diagnostic) {
 		message = report.Message("unexpected %v %v", got, e.where)
 	}
 
-	snippet := report.Snippet(e.what)
+	what := e.what.Span()
+	snippet := report.Snippet(what)
 	if e.want.Len() > 0 {
-		snippet = report.Snippetf(e.what, "expected %v", e.want.Join("or"))
+		snippet = report.Snippetf(what, "expected %v", e.want.Join("or"))
 	}
 
 	d.Apply(
 		message,
 		snippet,
 		report.Snippetf(e.prev, "previous %v is here", e.where.Subject()),
 	)
+
+	if e.insert != "" {
+		want, _ := iterx.First(e.want.All())
+		d.Apply(justify(
+			e.stream,
+			what,
+			fmt.Sprintf("consider inserting a %v", want),
+			justified{
+				report.Edit{Replace: e.insert},
+				e.insertJustify,
+			},
+		))
+	}
 }
 
 // errMoreThanOne is used to diagnose the occurrence of some construct more
@@ -85,3 +128,111 @@ func (e errMoreThanOne) Diagnose(d *report.Diagnostic) {
 		report.Snippetf(e.first, "first one is here"),
 	)
 }
+
+const (
+	justifyNone int = iota
+	justifyBetween
+	justifyRight
+	justifyLeft
+)
+
+type justified struct {
+	report.Edit
+	justify int
+}
+
+func justify(stream *token.Stream, span report.Span, message string, edits ...justified) report.DiagnosticOption {
+	text := span.File.Text()
+	for i := range edits {
+		e := &edits[i]
+		// Convert the edits to absolute offsets.
+		e.Start += span.Start
+		e.End += span.Start
+		empty := e.Start == e.End
+
+		spaceAfter := func(text string, idx int) int {
+			r, ok := stringsx.Rune(text, idx)
+			if !ok || !unicode.IsSpace(r) {
+				return 0
+			}
+			return utf8.RuneLen(r)
+		}
+		spaceBefore := func(text string, idx int) int {
+			r, ok := stringsx.PrevRune(text, idx)
+			if !ok || !unicode.IsSpace(r) {
+				return 0
+			}
+			return utf8.RuneLen(r)
+		}
+
+		switch edits[i].justify {
+		case justifyBetween:
+			// If possible, shift the offset such that it is surrounded by
+			// whitespace. However, this is not always possible, in which case we
+			// must add whitespace to text.
+			prev := spaceBefore(text, e.Start)
+			next := spaceAfter(text, e.End)
+			switch {
+			case prev > 0 && next > 0:
+				// Nothing to do here.
+
+			case empty && prev > 0 && spaceBefore(text, e.Start-prev) > 0:
+				e.Start -= prev
+				e.End -= prev
+			case prev > 0:
+				e.Replace += " "
+
+			case empty && next > 0 && spaceAfter(text, e.End+next) > 0:
+				e.Start += next
+				e.End += next
+			case next > 0:
+				e.Replace = " " + e.Replace
+
+			default:
+				// We're crammed between non-whitespace.
+				e.Replace = " " + e.Replace + " "
+			}
+
+		case justifyLeft:
+			// Get the token at the start of the span.
+			start, end := stream.Around(e.Start)
+			if end.IsZero() {
+				break
+			}
+
+			c := token.NewCursorAt(start)
+			// Seek to the previous unskippable token, and use its end as
+			// the start of the justification.
+			e.Start = c.Prev().Span().End
+			if empty {
+				e.End = e.Start
+			}
+
+		case justifyRight:
+			// Identical to the above, but reversed.
+			start, end := stream.Around(e.Start)
+			if start.IsZero() {
+				break
+			}
+
+			c := token.NewCursorAt(end)
+			e.End = c.Next().Span().Start
+			if empty {
+				e.Start = e.End
+			}
+		}
+	}
+
+	span = report.JoinSeq(iterx.Map(slicesx.Values(edits), func(j justified) report.Span {
+		return span.File.Span(j.Start, j.End)
+	}))
+	return report.SuggestEdits(
+		span, message,
+		slicesx.Collect(iterx.Map(slicesx.Values(edits),
+			func(j justified) report.Edit {
+				j.Start -= span.Start
+				j.End -= span.Start
+				return j.Edit
+			}))...,
+	)
+}
diff --git a/experimental/parser/lex_string.go b/experimental/parser/lex_string.go
@@ -42,7 +42,7 @@ func lexString(l *lexer) token.Token {
 		}
 
 		cursor := l.cursor
-		sc := lexStringContent(l)
+		sc := lexStringContent(quote, l)
 		if sc.isEscape && !haveEsc {
 			// If we saw our first escape, spill the string into the buffer
 			// up to just before the escape.
@@ -79,14 +79,20 @@ type stringContent struct {
 
 // lexStringContent lexes a single logical rune's worth of content for a quoted
 // string.
-func lexStringContent(l *lexer) (sc stringContent) {
+func lexStringContent(_ rune, l *lexer) (sc stringContent) {
 	start := l.cursor
 	r := l.Pop()
 
 	switch {
 	case r == 0:
+		esc := l.SpanFrom(l.cursor - utf8.RuneLen(r))
 		l.Errorf("unescaped NUL bytes are not permitted in string literals").Apply(
-			report.Snippetf(l.SpanFrom(l.cursor-utf8.RuneLen(r)), "replace this with `\\0` or `\\x00`"),
+			report.Snippet(esc),
+			report.SuggestEdits(esc, "replace it with `\\0` or `\\x00`", report.Edit{
+				Start:   0,
+				End:     1,
+				Replace: "\\0",
+			}),
 		)
 	case r == '\n':
 		// TODO: This diagnostic is simply user-hostile. We should remove it.
@@ -98,10 +104,10 @@ func lexStringContent(l *lexer) (sc stringContent) {
 		// Many programming languages have since thoughtlessly copied this
 		// choice, including Protobuf, whose lexical morphology is almost
 		// exactly C's).
+		nl := l.SpanFrom(l.cursor - utf8.RuneLen(r))
 		l.Errorf("unescaped newlines are not permitted in string literals").Apply(
-			// Not to mention, this diagnostic is not ideal: we should probably
-			// tell users to split the string into multiple quoted fragments.
-			report.Snippetf(l.SpanFrom(l.cursor-utf8.RuneLen(r)), "replace this with `\\n`"),
+			report.Snippet(nl),
+			report.Helpf("consider splitting this into two adjacent string literals"),
-			report.Helpf("consider splitting this into two adjacent string literals"),
+			report.Helpf("consider splitting this into adjacent string literals"),
-			report.Helpf("consider splitting this into two adjacent string literals"),
+			report.Helpf("consider splitting this into adjacent string literals"),
 		)
 	case report.NonPrint(r):
 		// Warn if the user has a non-printable character in their string that isn't
@@ -116,8 +122,14 @@ func lexStringContent(l *lexer) (sc stringContent) {
 			escape = fmt.Sprintf(`\U%08x`, r)
 		}
 
+		esc := l.SpanFrom(l.cursor - utf8.RuneLen(r))
 		l.Warnf("non-printable character in string literal").Apply(
-			report.Snippetf(l.SpanFrom(l.cursor-utf8.RuneLen(r)), "help: consider escaping this with e.g. `%s` instead", escape),
+			report.Snippet(esc),
+			report.SuggestEdits(esc, "consider escaping it", report.Edit{
+				Start:   0,
+				End:     len(esc.Text()),
+				Replace: escape,
+			}),
 		)
 	}