Skip to content

Commit

Permalink
fix charset conversion warning/error
Browse files Browse the repository at this point in the history
Signed-off-by: Yang Keao <[email protected]>
  • Loading branch information
YangKeao committed Feb 20, 2024
1 parent e3e0f7e commit 7d7790a
Show file tree
Hide file tree
Showing 7 changed files with 113 additions and 33 deletions.
42 changes: 18 additions & 24 deletions pkg/expression/builtin_convert_charset.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,6 @@ package expression
import (
"bytes"
"fmt"
"strings"
"unicode"

"github.com/pingcap/tidb/pkg/errno"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/parser/charset"
Expand Down Expand Up @@ -177,8 +174,14 @@ func (b *builtinInternalFromBinarySig) evalString(ctx EvalContext, row chunk.Row
valBytes := hack.Slice(val)
ret, err := enc.Transform(nil, valBytes, charset.OpDecode)
if err != nil {
strHex := formatInvalidChars(valBytes)
err = errCannotConvertString.GenWithStackByArgs(strHex, charset.CharsetBin, b.tp.GetCharset())
strHex := charset.FormatInvalidChars(valBytes)
err := errCannotConvertString.GenWithStackByArgs(strHex, charset.CharsetBin, b.tp.GetCharset())

tc := typeCtx(ctx)
tc.AppendWarning(err)
if sqlMode(ctx).HasStrictMode() {
return "", true, nil
}
}
return string(ret), false, err
}
Expand All @@ -200,6 +203,7 @@ func (b *builtinInternalFromBinarySig) vecEvalString(ctx EvalContext, input *chu
enc := charset.FindEncoding(b.tp.GetCharset())
encodedBuf := &bytes.Buffer{}
result.ReserveString(n)
hasStrictMode := sqlMode(ctx).HasStrictMode()
for i := 0; i < n; i++ {
if buf.IsNull(i) {
result.AppendNull()
Expand All @@ -208,8 +212,15 @@ func (b *builtinInternalFromBinarySig) vecEvalString(ctx EvalContext, input *chu
str := buf.GetBytes(i)
val, err := enc.Transform(encodedBuf, str, charset.OpDecode)
if err != nil {
strHex := formatInvalidChars(str)
return errCannotConvertString.GenWithStackByArgs(strHex, charset.CharsetBin, b.tp.GetCharset())
strHex := charset.FormatInvalidChars(str)
err := errCannotConvertString.GenWithStackByArgs(strHex, charset.CharsetBin, b.tp.GetCharset())

tc := typeCtx(ctx)
tc.AppendWarning(err)
if hasStrictMode {
result.AppendNull()
continue
}
}
result.AppendBytes(val)
}
Expand Down Expand Up @@ -339,20 +350,3 @@ func isLegacyCharset(chs string) bool {
}
return false
}

func formatInvalidChars(src []byte) string {
var sb strings.Builder
const maxBytesToShow = 5
for i := 0; i < len(src); i++ {
if i > maxBytesToShow {
sb.WriteString("...")
break
}
if src[i] > unicode.MaxASCII {
sb.WriteString(fmt.Sprintf("\\x%X", src[i]))
} else {
sb.Write([]byte{src[i]})
}
}
return sb.String()
}
1 change: 1 addition & 0 deletions pkg/parser/ast/misc.go
Original file line number Diff line number Diff line change
Expand Up @@ -3837,6 +3837,7 @@ type TextString struct {

type BinaryLiteral interface {
ToString() string
ToBytes() []byte
}

// NewDecimal creates a types.Decimal value, it's provided by parser driver.
Expand Down
25 changes: 24 additions & 1 deletion pkg/parser/charset/encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,12 @@

package charset

import "bytes"
import (
"bytes"
"fmt"
"strings"
"unicode"
)

// Make sure all of them implement Encoding interface.
var (
Expand Down Expand Up @@ -157,3 +162,21 @@ func CountValidBytesDecode(e Encoding, src []byte) int {
})
return nSrc
}

// FormatInvalidChars formats the invalid string in an easier to understand format in error/warning message
func FormatInvalidChars(src []byte) string {
var sb strings.Builder
const maxBytesToShow = 5
for i := 0; i < len(src); i++ {
if i > maxBytesToShow {
sb.WriteString("...")
break
}
if src[i] > unicode.MaxASCII {
sb.WriteString(fmt.Sprintf("\\x%X", src[i]))
} else {
sb.Write([]byte{src[i]})
}
}
return sb.String()
}
24 changes: 20 additions & 4 deletions pkg/parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -17444,36 +17444,52 @@ yynewstate:
}
case 1344:
{
co, err := charset.GetDefaultCollationLegacy(yyS[yypt-1].ident)
cs := strings.ToLower(yyS[yypt-1].ident)
co, err := charset.GetDefaultCollationLegacy(cs)
if err != nil {
yylex.AppendError(ast.ErrUnknownCharacterSet.GenWithStack("Unsupported character introducer: '%-.64s'", yyS[yypt-1].ident))
return 1
}
expr := ast.NewValueExpr(yyS[yypt-0].item, yyS[yypt-1].ident, co)
expr := ast.NewValueExpr(yyS[yypt-0].item, cs, co)
tp := expr.GetType()
tp.SetCharset(yyS[yypt-1].ident)
tp.SetCollate(co)
tp.AddFlag(mysql.UnderScoreCharsetFlag)
if tp.GetCollate() == charset.CollationBin {
tp.AddFlag(mysql.BinaryFlag)
}

val := expr.GetValue().(ast.BinaryLiteral).ToBytes()
encoding := charset.FindEncoding(yyS[yypt-1].ident)
if !encoding.IsValid(val) {
yylex.AppendError(charset.ErrInvalidCharacterString.GenWithStackByArgs(yyS[yypt-1].ident, charset.FormatInvalidChars(val)))
return 1
}
parser.yyVAL.expr = expr
}
case 1345:
{
co, err := charset.GetDefaultCollationLegacy(yyS[yypt-1].ident)
cs := strings.ToLower(yyS[yypt-1].ident)
co, err := charset.GetDefaultCollationLegacy(cs)
if err != nil {
yylex.AppendError(ast.ErrUnknownCharacterSet.GenWithStack("Unsupported character introducer: '%-.64s'", yyS[yypt-1].ident))
return 1
}
expr := ast.NewValueExpr(yyS[yypt-0].item, yyS[yypt-1].ident, co)
expr := ast.NewValueExpr(yyS[yypt-0].item, cs, co)
tp := expr.GetType()
tp.SetCharset(yyS[yypt-1].ident)
tp.SetCollate(co)
tp.AddFlag(mysql.UnderScoreCharsetFlag)
if tp.GetCollate() == charset.CollationBin {
tp.AddFlag(mysql.BinaryFlag)
}

val := expr.GetValue().(ast.BinaryLiteral).ToBytes()
encoding := charset.FindEncoding(yyS[yypt-1].ident)
if !encoding.IsValid(val) {
yylex.AppendError(charset.ErrInvalidCharacterString.GenWithStackByArgs(yyS[yypt-1].ident, charset.FormatInvalidChars(val)))
return 1
}
parser.yyVAL.expr = expr
}
case 1346:
Expand Down
24 changes: 20 additions & 4 deletions pkg/parser/parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -7368,36 +7368,52 @@ Literal:
}
| "UNDERSCORE_CHARSET" hexLit
{
co, err := charset.GetDefaultCollationLegacy($1)
cs := strings.ToLower($1)
co, err := charset.GetDefaultCollationLegacy(cs)
if err != nil {
yylex.AppendError(ast.ErrUnknownCharacterSet.GenWithStack("Unsupported character introducer: '%-.64s'", $1))
return 1
}
expr := ast.NewValueExpr($2, $1, co)
expr := ast.NewValueExpr($2, cs, co)
tp := expr.GetType()
tp.SetCharset($1)
tp.SetCollate(co)
tp.AddFlag(mysql.UnderScoreCharsetFlag)
if tp.GetCollate() == charset.CollationBin {
tp.AddFlag(mysql.BinaryFlag)
}

val := expr.GetValue().(ast.BinaryLiteral).ToBytes()
encoding := charset.FindEncoding($1)
if !encoding.IsValid(val) {
yylex.AppendError(charset.ErrInvalidCharacterString.GenWithStackByArgs($1, charset.FormatInvalidChars(val)))
return 1
}
$$ = expr
}
| "UNDERSCORE_CHARSET" bitLit
{
co, err := charset.GetDefaultCollationLegacy($1)
cs := strings.ToLower($1)
co, err := charset.GetDefaultCollationLegacy(cs)
if err != nil {
yylex.AppendError(ast.ErrUnknownCharacterSet.GenWithStack("Unsupported character introducer: '%-.64s'", $1))
return 1
}
expr := ast.NewValueExpr($2, $1, co)
expr := ast.NewValueExpr($2, cs, co)
tp := expr.GetType()
tp.SetCharset($1)
tp.SetCollate(co)
tp.AddFlag(mysql.UnderScoreCharsetFlag)
if tp.GetCollate() == charset.CollationBin {
tp.AddFlag(mysql.BinaryFlag)
}

val := expr.GetValue().(ast.BinaryLiteral).ToBytes()
encoding := charset.FindEncoding($1)
if !encoding.IsValid(val) {
yylex.AppendError(charset.ErrInvalidCharacterString.GenWithStackByArgs($1, charset.FormatInvalidChars(val)))
return 1
}
$$ = expr
}

Expand Down
15 changes: 15 additions & 0 deletions pkg/parser/test_driver/test_driver_datum.go
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,11 @@ func (b BinaryLiteral) ToString() string {
return string(b)
}

// ToBytes returns the bytes representation for the literal.
func (b BinaryLiteral) ToBytes() []byte {
return b
}

// ToBitLiteralString returns the bit literal representation for the literal.
func (b BinaryLiteral) ToBitLiteralString(trimLeadingZero bool) string {
if len(b) == 0 {
Expand Down Expand Up @@ -369,6 +374,11 @@ func (b BitLiteral) ToString() string {
return BinaryLiteral(b).ToString()
}

// ToBytes implement ast.BinaryLiteral interface
func (b BitLiteral) ToBytes() []byte {
return BinaryLiteral(b).ToBytes()
}

// ParseHexStr parses hexadecimal string literal.
// See https://dev.mysql.com/doc/refman/5.7/en/hexadecimal-literals.html
func ParseHexStr(s string) (BinaryLiteral, error) {
Expand Down Expand Up @@ -417,6 +427,11 @@ func (b HexLiteral) ToString() string {
return BinaryLiteral(b).ToString()
}

// ToBytes implement ast.BinaryLiteral interface
func (b HexLiteral) ToBytes() []byte {
return BinaryLiteral(b).ToBytes()
}

// SetBinChsClnFlag sets charset, collation as 'binary' and adds binaryFlag to FieldType.
func SetBinChsClnFlag(ft *types.FieldType) {
ft.SetCharset(charset.CharsetBin)
Expand Down
15 changes: 15 additions & 0 deletions pkg/types/binary_literal.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ func (b BinaryLiteral) ToString() string {
return string(b)
}

// ToBytes returns the bytes representation for the literal.
func (b BinaryLiteral) ToBytes() []byte {
return b
}

// ToBitLiteralString returns the bit literal representation for the literal.
func (b BinaryLiteral) ToBitLiteralString(trimLeadingZero bool) string {
if len(b) == 0 {
Expand Down Expand Up @@ -186,6 +191,11 @@ func (b BitLiteral) ToString() string {
return BinaryLiteral(b).ToString()
}

// ToBytes implement ast.BinaryLiteral interface
func (b BitLiteral) ToBytes() []byte {
return BinaryLiteral(b).ToBytes()
}

// ParseHexStr parses hexadecimal string literal.
// See https://dev.mysql.com/doc/refman/5.7/en/hexadecimal-literals.html
func ParseHexStr(s string) (BinaryLiteral, error) {
Expand Down Expand Up @@ -233,3 +243,8 @@ func NewHexLiteral(s string) (HexLiteral, error) {
func (b HexLiteral) ToString() string {
return BinaryLiteral(b).ToString()
}

// ToBytes implement ast.BinaryLiteral interface
func (b HexLiteral) ToBytes() []byte {
return BinaryLiteral(b).ToBytes()
}

0 comments on commit 7d7790a

Please sign in to comment.