From 70872b97c5d722ac1e1f8749bdc50a08bcf32917 Mon Sep 17 00:00:00 2001 From: Yang Keao Date: Fri, 10 May 2024 13:09:17 +0800 Subject: [PATCH] fix charset conversion warning/error Signed-off-by: Yang Keao --- pkg/expression/builtin.go | 4 +- pkg/expression/builtin_cast.go | 2 +- pkg/expression/builtin_cast_test.go | 11 +++-- pkg/expression/builtin_convert_charset.go | 46 +++++++++++++++---- pkg/expression/distsql_builtin.go | 3 +- pkg/expression/scalar_function.go | 2 +- .../r/expression/charset_and_collation.result | 12 +++++ .../t/expression/charset_and_collation.test | 8 ++++ 8 files changed, 71 insertions(+), 17 deletions(-) diff --git a/pkg/expression/builtin.go b/pkg/expression/builtin.go index d4f1e2c71f169..5db08b06cb151 100644 --- a/pkg/expression/builtin.go +++ b/pkg/expression/builtin.go @@ -193,7 +193,7 @@ func newBaseBuiltinFuncWithTp(ctx BuildContext, funcName string, args []Expressi args[i] = WrapWithCastAsDecimal(ctx, args[i]) case types.ETString: args[i] = WrapWithCastAsString(ctx, args[i]) - args[i] = HandleBinaryLiteral(ctx, args[i], ec, funcName) + args[i] = HandleBinaryLiteral(ctx, args[i], ec, funcName, false) case types.ETDatetime: args[i] = WrapWithCastAsTime(ctx, args[i], types.NewFieldType(mysql.TypeDatetime)) case types.ETTimestamp: @@ -253,7 +253,7 @@ func newBaseBuiltinFuncWithFieldTypes(ctx BuildContext, funcName string, args [] args[i] = WrapWithCastAsReal(ctx, args[i]) case types.ETString: args[i] = WrapWithCastAsString(ctx, args[i]) - args[i] = HandleBinaryLiteral(ctx, args[i], ec, funcName) + args[i] = HandleBinaryLiteral(ctx, args[i], ec, funcName, false) case types.ETJson: args[i] = WrapWithCastAsJSON(ctx, args[i]) // https://github.com/pingcap/tidb/issues/44196 diff --git a/pkg/expression/builtin_cast.go b/pkg/expression/builtin_cast.go index f04e5b096b9a6..b406a0f938ea2 100644 --- a/pkg/expression/builtin_cast.go +++ b/pkg/expression/builtin_cast.go @@ -328,7 +328,7 @@ func (c *castAsStringFunctionClass) getFunction(ctx BuildContext, args []Express case types.ETString: // When cast from binary to some other charsets, we should check if the binary is valid or not. // so we build a from_binary function to do this check. - bf.args[0] = HandleBinaryLiteral(ctx, args[0], &ExprCollation{Charset: c.tp.GetCharset(), Collation: c.tp.GetCollate()}, c.funcName) + bf.args[0] = HandleBinaryLiteral(ctx, args[0], &ExprCollation{Charset: c.tp.GetCharset(), Collation: c.tp.GetCollate()}, c.funcName, true) sig = &builtinCastStringAsStringSig{bf} sig.setPbCode(tipb.ScalarFuncSig_CastStringAsString) default: diff --git a/pkg/expression/builtin_cast_test.go b/pkg/expression/builtin_cast_test.go index 97746afa52b07..4f106a5837961 100644 --- a/pkg/expression/builtin_cast_test.go +++ b/pkg/expression/builtin_cast_test.go @@ -1393,7 +1393,7 @@ func TestWrapWithCastAsString(t *testing.T) { cases := []struct { expr Expression - err bool + warn bool ret string }{ { @@ -1432,11 +1432,14 @@ func TestWrapWithCastAsString(t *testing.T) { "-127", }, } + lastWarningLen := 0 for _, c := range cases { expr := BuildCastFunction(ctx, c.expr, types.NewFieldType(mysql.TypeVarString)) - res, _, err := expr.EvalString(ctx, chunk.Row{}) - if c.err { - require.Error(t, err) + res, _, _ := expr.EvalString(ctx, chunk.Row{}) + if c.warn { + warns := ctx.GetSessionVars().StmtCtx.GetWarnings() + require.Greater(t, len(warns), lastWarningLen) + lastWarningLen = len(warns) } else { require.Equal(t, c.ret, res) } diff --git a/pkg/expression/builtin_convert_charset.go b/pkg/expression/builtin_convert_charset.go index 234dbb3674358..714a706a4ffd6 100644 --- a/pkg/expression/builtin_convert_charset.go +++ b/pkg/expression/builtin_convert_charset.go @@ -134,7 +134,8 @@ func (b *builtinInternalToBinarySig) vecEvalString(ctx EvalContext, input *chunk type tidbFromBinaryFunctionClass struct { baseFunctionClass - tp *types.FieldType + tp *types.FieldType + cannotConvertStringAsWarning bool } func (c *tidbFromBinaryFunctionClass) getFunction(ctx BuildContext, args []Expression) (builtinFunc, error) { @@ -150,7 +151,7 @@ func (c *tidbFromBinaryFunctionClass) getFunction(ctx BuildContext, args []Expre return nil, err } bf.tp = c.tp - sig = &builtinInternalFromBinarySig{bf} + sig = &builtinInternalFromBinarySig{bf, c.cannotConvertStringAsWarning} sig.setPbCode(tipb.ScalarFuncSig_FromBinary) default: return nil, fmt.Errorf("unexpected argTp: %d", argTp) @@ -160,6 +161,9 @@ func (c *tidbFromBinaryFunctionClass) getFunction(ctx BuildContext, args []Expre type builtinInternalFromBinarySig struct { baseBuiltinFunc + + // TODO: also pass this field when pushing down this function. The behavior of TiDB and TiKV is different on this function now. + cannotConvertStringAsWarning bool } func (b *builtinInternalFromBinarySig) Clone() builtinFunc { @@ -179,8 +183,20 @@ func (b *builtinInternalFromBinarySig) evalString(ctx EvalContext, row chunk.Row if err != nil { strHex := formatInvalidChars(valBytes) err = errCannotConvertString.GenWithStackByArgs(strHex, charset.CharsetBin, b.tp.GetCharset()) + + if b.cannotConvertStringAsWarning { + tc := typeCtx(ctx) + tc.AppendWarning(err) + if sqlMode(ctx).HasStrictMode() { + return "", true, nil + } else { + return string(ret), false, nil + } + } else { + return "", false, err + } } - return string(ret), false, err + return string(ret), false, nil } func (b *builtinInternalFromBinarySig) vectorized() bool { @@ -209,7 +225,21 @@ func (b *builtinInternalFromBinarySig) vecEvalString(ctx EvalContext, input *chu val, err := enc.Transform(encodedBuf, str, charset.OpDecode) if err != nil { strHex := formatInvalidChars(str) - return errCannotConvertString.GenWithStackByArgs(strHex, charset.CharsetBin, b.tp.GetCharset()) + err = errCannotConvertString.GenWithStackByArgs(strHex, charset.CharsetBin, b.tp.GetCharset()) + + if b.cannotConvertStringAsWarning { + tc := typeCtx(ctx) + tc.AppendWarning(err) + if sqlMode(ctx).HasStrictMode() { + result.AppendNull() + continue + } else { + result.AppendBytes(str) + continue + } + } else { + return err + } } result.AppendBytes(val) } @@ -232,8 +262,8 @@ func BuildToBinaryFunction(ctx BuildContext, expr Expression) (res Expression) { } // BuildFromBinaryFunction builds from_binary function. -func BuildFromBinaryFunction(ctx BuildContext, expr Expression, tp *types.FieldType) (res Expression) { - fc := &tidbFromBinaryFunctionClass{baseFunctionClass{InternalFuncFromBinary, 1, 1}, tp} +func BuildFromBinaryFunction(ctx BuildContext, expr Expression, tp *types.FieldType, cannotConvertStringAsWarning bool) (res Expression) { + fc := &tidbFromBinaryFunctionClass{baseFunctionClass{InternalFuncFromBinary, 1, 1}, tp, cannotConvertStringAsWarning} f, err := fc.getFunction(ctx, []Expression{expr}) if err != nil { return expr @@ -305,7 +335,7 @@ func init() { } // HandleBinaryLiteral wraps `expr` with to_binary or from_binary sig. -func HandleBinaryLiteral(ctx BuildContext, expr Expression, ec *ExprCollation, funcName string) Expression { +func HandleBinaryLiteral(ctx BuildContext, expr Expression, ec *ExprCollation, funcName string, explicitCast bool) Expression { argChs, dstChs := expr.GetType().GetCharset(), ec.Charset switch convertFuncsMap[funcName] { case funcPropNone: @@ -326,7 +356,7 @@ func HandleBinaryLiteral(ctx BuildContext, expr Expression, ec *ExprCollation, f ft := expr.GetType().Clone() ft.SetCharset(ec.Charset) ft.SetCollate(ec.Collation) - return BuildFromBinaryFunction(ctx, expr, ft) + return BuildFromBinaryFunction(ctx, expr, ft, explicitCast) } } return expr diff --git a/pkg/expression/distsql_builtin.go b/pkg/expression/distsql_builtin.go index cdb9d382607a5..0012410a60c80 100644 --- a/pkg/expression/distsql_builtin.go +++ b/pkg/expression/distsql_builtin.go @@ -1074,7 +1074,8 @@ func getSignatureByPB(ctx BuildContext, sigCode tipb.ScalarFuncSig, tp *tipb.Fie case tipb.ScalarFuncSig_ToBinary: f = &builtinInternalToBinarySig{base} case tipb.ScalarFuncSig_FromBinary: - f = &builtinInternalFromBinarySig{base} + // TODO: set the `cannotConvertStringAsWarning` accordingly + f = &builtinInternalFromBinarySig{base, false} default: e = ErrFunctionNotExists.GenWithStackByArgs("FUNCTION", sigCode) diff --git a/pkg/expression/scalar_function.go b/pkg/expression/scalar_function.go index fcef118dc5002..a325842dd4993 100644 --- a/pkg/expression/scalar_function.go +++ b/pkg/expression/scalar_function.go @@ -201,7 +201,7 @@ func newFunctionImpl(ctx BuildContext, fold int, funcName string, retType *types case ast.GetVar: return BuildGetVarFunction(ctx, args[0], retType) case InternalFuncFromBinary: - return BuildFromBinaryFunction(ctx, args[0], retType), nil + return BuildFromBinaryFunction(ctx, args[0], retType, false), nil case InternalFuncToBinary: return BuildToBinaryFunction(ctx, args[0]), nil case ast.Sysdate: diff --git a/tests/integrationtest/r/expression/charset_and_collation.result b/tests/integrationtest/r/expression/charset_and_collation.result index 4e792011683ba..71815729c1084 100644 --- a/tests/integrationtest/r/expression/charset_and_collation.result +++ b/tests/integrationtest/r/expression/charset_and_collation.result @@ -1991,3 +1991,15 @@ LOCATE('bar' collate utf8mb4_0900_ai_ci, 'FOOBAR' collate utf8mb4_0900_ai_ci) select 'FOOBAR' collate utf8mb4_0900_ai_ci REGEXP 'foo.*' collate utf8mb4_0900_ai_ci; 'FOOBAR' collate utf8mb4_0900_ai_ci REGEXP 'foo.*' collate utf8mb4_0900_ai_ci 1 +select cast(compress('b') as char); +cast(compress('b') as char) +NULL +Level Code Message +Warning 3854 Cannot convert string 'x\x9C...' from binary to utf8mb4 +set sql_mode=''; +select cast(compress('b') as char); +cast(compress('b') as char) +x +Level Code Message +Warning 3854 Cannot convert string 'x\x9C...' from binary to utf8mb4 +set sql_mode=DEFAULT; diff --git a/tests/integrationtest/t/expression/charset_and_collation.test b/tests/integrationtest/t/expression/charset_and_collation.test index ef5ecffab1b88..3fb58a20a2691 100644 --- a/tests/integrationtest/t/expression/charset_and_collation.test +++ b/tests/integrationtest/t/expression/charset_and_collation.test @@ -814,3 +814,11 @@ select min(id) from t group by str order by str; # TestUTF8MB40900AICIStrFunc select LOCATE('bar' collate utf8mb4_0900_ai_ci, 'FOOBAR' collate utf8mb4_0900_ai_ci); select 'FOOBAR' collate utf8mb4_0900_ai_ci REGEXP 'foo.*' collate utf8mb4_0900_ai_ci; + +# TestConvertCharsetFromBinary +--enable_warnings +select cast(compress('b') as char); +set sql_mode=''; +select cast(compress('b') as char); +--disable_warnings +set sql_mode=DEFAULT;