Skip to content

Commit

Permalink
feat(ansi): add method type for cell width calculation
Browse files Browse the repository at this point in the history
This adds a new type `Method` to the `ansi` package that represents how
to calculate the cell widths in the terminal. The default is to use
`GraphemeWidth` which keeps the current behavior. In order to use
`WcWidth`, you can call the member functions on the `Method` type.

```go
ansi.StringWidth("👩‍👩‍👧‍👧") // 2
ansi.GraphemeWidth.StringWidth("👩‍👩‍👧‍👧") // 2
ansi.WcWidth.StringWidth("👩‍👩‍👧‍👧") // 6
```
  • Loading branch information
aymanbagabas committed Oct 17, 2024
1 parent 0e84032 commit 6718bbd
Show file tree
Hide file tree
Showing 11 changed files with 221 additions and 49 deletions.
7 changes: 6 additions & 1 deletion ansi/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,9 @@ module github.com/charmbracelet/x/ansi

go 1.18

require github.com/rivo/uniseg v0.4.7
require (
github.com/charmbracelet/x/wcwidth v0.0.0-20241017213443-f2394f742aee
github.com/rivo/uniseg v0.4.7
)

require golang.org/x/text v0.19.0 // indirect
4 changes: 4 additions & 0 deletions ansi/go.sum
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
github.com/charmbracelet/x/wcwidth v0.0.0-20241017213443-f2394f742aee h1:pPwlYuFaYNPhFRqbwEqlRyq5PwLdLInR4zAlh9e7ad0=
github.com/charmbracelet/x/wcwidth v0.0.0-20241017213443-f2394f742aee/go.mod h1:Ey8PFmYwH+/td9bpiEx07Fdx9ZVkxfIjWXxBluxF4Nw=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM=
golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
13 changes: 13 additions & 0 deletions ansi/method.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package ansi

// Method is a type that represents the how to calculate the cell widths in the
// terminal. The default is to use [WcWidth]. Some terminals use grapheme
// clustering by default. Some support mode 2027 to tell the terminal to use
// mode 2027 instead of wcwidth.
type Method uint8

// Display width modes.
const (
WcWidth Method = iota
GraphemeWidth
)
112 changes: 111 additions & 1 deletion ansi/parser_decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"unicode/utf8"

"github.com/charmbracelet/x/ansi/parser"
"github.com/charmbracelet/x/wcwidth"
"github.com/rivo/uniseg"
)

Expand Down Expand Up @@ -67,7 +68,113 @@ const (
// state = newState
// input = input[n:]
// }
func DecodeSequence[T string | []byte](b T, state byte, p *Parser) (seq T, width int, n int, newState byte) {
func DecodeSequence(b []byte, state byte, p *Parser) (seq []byte, width int, n int, newState byte) {
return GraphemeWidth.DecodeSequence(b, state, p)
}

// DecodeSequenceInString is like [DecodeSequence] but for strings.
func DecodeSequenceInString(s string, state byte, p *Parser) (seq string, width int, n int, newState byte) {
return GraphemeWidth.DecodeSequenceInString(s, state, p)
}

// DecodeSequence decodes the first ANSI escape sequence or a printable
// grapheme from the given data. It returns the sequence slice, the number of
// bytes read, the cell width for each sequence, and the new state.
//
// The cell width will always be 0 for control and escape sequences, 1 for
// ASCII printable characters, and the number of cells other Unicode characters
// occupy. It uses the uniseg package to calculate the width of Unicode
// graphemes and characters. This means it will always do grapheme clustering
// (mode 2027).
//
// Passing a non-nil [*Parser] as the last argument will allow the decoder to
// collect sequence parameters, data, and commands. The parser cmd will have
// the packed command value that contains intermediate and marker characters.
// In the case of a OSC sequence, the cmd will be the OSC command number. Use
// [Cmd] and [Param] types to unpack command intermediates and markers as well
// as parameters.
//
// Zero [p.Cmd] means the CSI, DCS, or ESC sequence is invalid. Moreover, checking the
// validity of other data sequences, OSC, DCS, etc, will require checking for
// the returned sequence terminator bytes such as ST (ESC \\) and BEL).
//
// We store the command byte in [p.Cmd] in the most significant byte, the
// marker byte in the next byte, and the intermediate byte in the least
// significant byte. This is done to avoid using a struct to store the command
// and its intermediates and markers. The command byte is always the least
// significant byte i.e. [p.Cmd & 0xff]. Use the [Cmd] type to unpack the
// command, intermediate, and marker bytes. Note that we only collect the last
// marker character and intermediate byte.
//
// The [p.Params] slice will contain the parameters of the sequence. Any
// sub-parameter will have the [parser.HasMoreFlag] set. Use the [Param] type
// to unpack the parameters.
//
// Example:
//
// var state byte // the initial state is always zero [NormalState]
// p := NewParser(32, 1024) // create a new parser with a 32 params buffer and 1024 data buffer (optional)
// input := []byte("\x1b[31mHello, World!\x1b[0m")
// for len(input) > 0 {
// seq, width, n, newState := DecodeSequence(input, state, p)
// log.Printf("seq: %q, width: %d", seq, width)
// state = newState
// input = input[n:]
// }
func (m Method) DecodeSequence(b []byte, state byte, p *Parser) (seq []byte, width int, n int, newState byte) {
return decodeSequence(m, b, state, p)
}

// DecodeSequenceInString is like [DecodeSequence] but for strings.
func (m Method) DecodeSequenceInString(s string, state byte, p *Parser) (seq string, width int, n int, newState byte) {
return decodeSequence(m, s, state, p)
}

// decodeSequence decodes the first ANSI escape sequence or a printable
// grapheme from the given data. It returns the sequence slice, the number of
// bytes read, the cell width for each sequence, and the new state.
//
// The cell width will always be 0 for control and escape sequences, 1 for
// ASCII printable characters, and the number of cells other Unicode characters
// occupy. It uses the uniseg package to calculate the width of Unicode
// graphemes and characters. This means it will always do grapheme clustering
// (mode 2027).
//
// Passing a non-nil [*Parser] as the last argument will allow the decoder to
// collect sequence parameters, data, and commands. The parser cmd will have
// the packed command value that contains intermediate and marker characters.
// In the case of a OSC sequence, the cmd will be the OSC command number. Use
// [Cmd] and [Param] types to unpack command intermediates and markers as well
// as parameters.
//
// Zero [p.Cmd] means the CSI, DCS, or ESC sequence is invalid. Moreover, checking the
// validity of other data sequences, OSC, DCS, etc, will require checking for
// the returned sequence terminator bytes such as ST (ESC \\) and BEL).
//
// We store the command byte in [p.Cmd] in the most significant byte, the
// marker byte in the next byte, and the intermediate byte in the least
// significant byte. This is done to avoid using a struct to store the command
// and its intermediates and markers. The command byte is always the least
// significant byte i.e. [p.Cmd & 0xff]. Use the [Cmd] type to unpack the
// command, intermediate, and marker bytes. Note that we only collect the last
// marker character and intermediate byte.
//
// The [p.Params] slice will contain the parameters of the sequence. Any
// sub-parameter will have the [parser.HasMoreFlag] set. Use the [Param] type
// to unpack the parameters.
//
// Example:
//
// var state byte // the initial state is always zero [NormalState]
// p := NewParser(32, 1024) // create a new parser with a 32 params buffer and 1024 data buffer (optional)
// input := []byte("\x1b[31mHello, World!\x1b[0m")
// for len(input) > 0 {
// seq, width, n, newState := DecodeSequence(input, state, p)
// log.Printf("seq: %q, width: %d", seq, width)
// state = newState
// input = input[n:]
// }
func decodeSequence[T string | []byte](m Method, b T, state byte, p *Parser) (seq T, width int, n int, newState byte) {
for i := 0; i < len(b); i++ {
c := b[i]

Expand Down Expand Up @@ -122,6 +229,9 @@ func DecodeSequence[T string | []byte](b T, state byte, p *Parser) (seq T, width

if utf8.RuneStart(c) {
seq, _, width, _ = FirstGraphemeCluster(b, -1)
if m == WcWidth {
width = wcwidth.StringWidth(string(seq))
}
i += len(seq)
return b[:i], width, i, NormalState
}
Expand Down
12 changes: 12 additions & 0 deletions ansi/truncate.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"bytes"

"github.com/charmbracelet/x/ansi/parser"
"github.com/charmbracelet/x/wcwidth"
"github.com/rivo/uniseg"
)

Expand All @@ -12,6 +13,14 @@ import (
// This function is aware of ANSI escape codes and will not break them, and
// accounts for wide-characters (such as East Asians and emojis).
func Truncate(s string, length int, tail string) string {
return GraphemeWidth.Truncate(s, length, tail)
}

// Truncate truncates a string to a given length, adding a tail to the
// end if the string is longer than the given length.
// This function is aware of ANSI escape codes and will not break them, and
// accounts for wide-characters (such as East Asians and emojis).
func (m Method) Truncate(s string, length int, tail string) string {
if sw := StringWidth(s); sw <= length {
return s
}
Expand Down Expand Up @@ -41,6 +50,9 @@ func Truncate(s string, length int, tail string) string {
// This action happens when we transition to the Utf8State.
var width int
cluster, _, width, _ = uniseg.FirstGraphemeCluster(b[i:], -1)
if m == WcWidth {
width = wcwidth.StringWidth(string(cluster))
}

// increment the index by the length of the cluster
i += len(cluster)
Expand Down
16 changes: 15 additions & 1 deletion ansi/width.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"bytes"

"github.com/charmbracelet/x/ansi/parser"
"github.com/charmbracelet/x/wcwidth"
"github.com/rivo/uniseg"
)

Expand Down Expand Up @@ -63,6 +64,14 @@ func Strip(s string) string {
// codes are ignored and wide characters (such as East Asians and emojis) are
// accounted for.
func StringWidth(s string) int {
return GraphemeWidth.StringWidth(s)
}

// StringWidth returns the width of a string in cells. This is the number of
// cells that the string will occupy when printed in a terminal. ANSI escape
// codes are ignored and wide characters (such as East Asians and emojis) are
// accounted for.
func (m Method) StringWidth(s string) int {
if s == "" {
return 0
}
Expand All @@ -78,7 +87,12 @@ func StringWidth(s string) int {
if state == parser.Utf8State {
var w int
cluster, _, w, _ = uniseg.FirstGraphemeClusterInString(s[i:], -1)
width += w
switch m {
case WcWidth:
width += wcwidth.StringWidth(cluster)
case GraphemeWidth:
width += w
}
i += len(cluster) - 1
pstate = parser.GroundState
continue
Expand Down
2 changes: 1 addition & 1 deletion ansi/width_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func TestStrip(t *testing.T) {
func TestStringWidth(t *testing.T) {
for i, c := range cases {
t.Run(c.name, func(t *testing.T) {
if width := StringWidth(c.input); width != c.width {
if width := GraphemeWidth.StringWidth(c.input); width != c.width {
t.Errorf("test case %d failed: expected %d, got %d", i+1, c.width, width)
}
})
Expand Down
50 changes: 48 additions & 2 deletions ansi/wrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"unicode/utf8"

"github.com/charmbracelet/x/ansi/parser"
"github.com/charmbracelet/x/wcwidth"
"github.com/rivo/uniseg"
)

Expand All @@ -18,6 +19,15 @@ const nbsp = 0xA0
// When preserveSpace is true, spaces at the beginning of a line will be
// preserved.
func Hardwrap(s string, limit int, preserveSpace bool) string {
return GraphemeWidth.Hardwrap(s, limit, preserveSpace)
}

// Hardwrap wraps a string or a block of text to a given line length, breaking
// word boundaries. This will preserve ANSI escape codes and will account for
// wide-characters in the string.
// When preserveSpace is true, spaces at the beginning of a line will be
// preserved.
func (m Method) Hardwrap(s string, limit int, preserveSpace bool) string {
if limit < 1 {
return s
}
Expand Down Expand Up @@ -56,7 +66,12 @@ func Hardwrap(s string, limit int, preserveSpace bool) string {
}

buf.Write(cluster)
curWidth += width
switch m {
case WcWidth:
curWidth += wcwidth.StringWidth(string(cluster))
case GraphemeWidth:
curWidth += width
}
pstate = parser.GroundState
continue
}
Expand Down Expand Up @@ -107,6 +122,18 @@ func Hardwrap(s string, limit int, preserveSpace bool) string {
//
// Note: breakpoints must be a string of 1-cell wide rune characters.
func Wordwrap(s string, limit int, breakpoints string) string {
return GraphemeWidth.Wordwrap(s, limit, breakpoints)
}

// Wordwrap wraps a string or a block of text to a given line length, not
// breaking word boundaries. This will preserve ANSI escape codes and will
// account for wide-characters in the string.
// The breakpoints string is a list of characters that are considered
// breakpoints for word wrapping. A hyphen (-) is always considered a
// breakpoint.
//
// Note: breakpoints must be a string of 1-cell wide rune characters.
func (m Method) Wordwrap(s string, limit int, breakpoints string) string {
if limit < 1 {
return s
}
Expand Down Expand Up @@ -165,7 +192,12 @@ func Wordwrap(s string, limit int, breakpoints string) string {
curWidth++
} else {
word.Write(cluster)
wordLen += width
switch m {
case WcWidth:
wordLen += wcwidth.StringWidth(string(cluster))
case GraphemeWidth:
wordLen += width
}
if curWidth+space.Len()+wordLen > limit &&
wordLen < limit {
addNewline()
Expand Down Expand Up @@ -235,6 +267,17 @@ func Wordwrap(s string, limit int, breakpoints string) string {
//
// Note: breakpoints must be a string of 1-cell wide rune characters.
func Wrap(s string, limit int, breakpoints string) string {
return GraphemeWidth.Wrap(s, limit, breakpoints)
}

// Wrap wraps a string or a block of text to a given line length, breaking word
// boundaries if necessary. This will preserve ANSI escape codes and will
// account for wide-characters in the string. The breakpoints string is a list
// of characters that are considered breakpoints for word wrapping. A hyphen
// (-) is always considered a breakpoint.
//
// Note: breakpoints must be a string of 1-cell wide rune characters.
func (m Method) Wrap(s string, limit int, breakpoints string) string {
if limit < 1 {
return s
}
Expand Down Expand Up @@ -280,6 +323,9 @@ func Wrap(s string, limit int, breakpoints string) string {
if state == parser.Utf8State {
var width int
cluster, _, width, _ = uniseg.FirstGraphemeCluster(b[i:], -1)
if m == WcWidth {
width = wcwidth.StringWidth(string(cluster))
}
i += len(cluster)

r, _ := utf8.DecodeRune(cluster)
Expand Down
9 changes: 4 additions & 5 deletions cellbuf/grid.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package cellbuf
import (
"bytes"
"strings"
"unicode/utf8"

"github.com/charmbracelet/x/ansi"
)
Expand Down Expand Up @@ -34,13 +33,13 @@ type Grid interface {

// SetContentAt writes the given data to the grid starting from the given
// position and with the given width and height.
func (m WidthMethod) SetContentAt(b Grid, c string, x, y, w, h int) []int {
return setContent(b, c, x, y, w, h, m, strings.ReplaceAll, utf8.DecodeRuneInString)
func SetContentAt(m ansi.Method, b Grid, c string, x, y, w, h int) []int {
return setContent(b, c, x, y, w, h, m)
}

// SetContent writes the given data to the grid starting from the first cell.
func (m WidthMethod) SetContent(g Grid, content string) []int {
return m.SetContentAt(g, content, 0, 0, g.Width(), Height(content))
func SetContent(m ansi.Method, g Grid, content string) []int {
return SetContentAt(m, g, content, 0, 0, g.Width(), Height(content))
}

// Render returns a string representation of the grid with ANSI escape sequences.
Expand Down
Loading

0 comments on commit 6718bbd

Please sign in to comment.