From e576e1ddcf7a3bfd0d2d221fab258551aa2b81a1 Mon Sep 17 00:00:00 2001 From: tangenta Date: Wed, 22 Dec 2021 13:18:47 +0800 Subject: [PATCH 1/3] table: set the collation correctly in CastValue --- executor/write_test.go | 14 ++++++++++++++ table/column.go | 6 +++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/executor/write_test.go b/executor/write_test.go index 11e402f446631..ec326d0b6d436 100644 --- a/executor/write_test.go +++ b/executor/write_test.go @@ -1096,6 +1096,20 @@ func TestReplace(t *testing.T) { tk.MustExec("drop table t1, t2") } +func TestReplaceWithCICollation(t *testing.T) { + collate.SetNewCollationEnabledForTest(true) + defer collate.SetNewCollationEnabledForTest(false) + store, clean := testkit.CreateMockStore(t) + defer clean() + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + + tk.MustExec("create table t (a varchar(20) charset utf8mb4 collate utf8mb4_general_ci primary key);") + tk.MustExec("replace into t(a) values (_binary'A '),(_binary'A');") + tk.MustQuery("select a from t use index(primary);").Check(testkit.Rows("A")) + tk.MustQuery("select a from t ignore index(primary);").Check(testkit.Rows("A")) +} + func TestGeneratedColumnForInsert(t *testing.T) { store, clean := testkit.CreateMockStore(t) defer clean() diff --git a/table/column.go b/table/column.go index d7e9a9ec5dadb..cdd296d8c1d0f 100644 --- a/table/column.go +++ b/table/column.go @@ -363,18 +363,18 @@ func validateStringDatum(ctx sessionctx.Context, origin, casted *types.Datum, co src := casted.GetBytes() encBytes, err := enc.Transform(nil, src, charset.OpDecode) if err != nil { - casted.SetBytesAsString(encBytes, charset.CollationUTF8MB4, 0) + casted.SetBytesAsString(encBytes, col.Collate, 0) nSrc := charset.CountValidBytesDecode(enc, src) return handleWrongCharsetValue(ctx, col, src, nSrc) } - casted.SetBytesAsString(encBytes, charset.CollationUTF8MB4, 0) + casted.SetBytesAsString(encBytes, col.Collate, 0) return nil } // Check if the string is valid in the given column charset. str := casted.GetBytes() if !charset.IsValid(enc, str) { replace, _ := enc.Transform(nil, str, charset.OpReplace) - casted.SetBytesAsString(replace, charset.CollationUTF8MB4, 0) + casted.SetBytesAsString(replace, col.Collate, 0) nSrc := charset.CountValidBytes(enc, str) return handleWrongCharsetValue(ctx, col, str, nSrc) } From 891ee5d8bb24d81a96b0196b0c04f472b89a48e1 Mon Sep 17 00:00:00 2001 From: tangenta Date: Wed, 22 Dec 2021 15:18:12 +0800 Subject: [PATCH 2/3] add test in TestCastValue --- table/column_test.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/table/column_test.go b/table/column_test.go index 02cbb12237afc..9d95537727067 100644 --- a/table/column_test.go +++ b/table/column_test.go @@ -303,6 +303,18 @@ func TestCastValue(t *testing.T) { colInfoS.Charset = charset.CharsetASCII _, err = CastValue(ctx, types.NewDatum([]byte{0x32, 0xf0}), &colInfoS, false, true) require.NoError(t, err) + + colInfoS.Charset = charset.CharsetUTF8 + colInfoS.Collate = "utf8mb4_general_ci" + val, err = CastValue(ctx, types.NewBinaryLiteralDatum([]byte{0xE5, 0xA5, 0xBD}), &colInfoS, false, false) + require.NoError(t, err) + require.Equal(t, "utf8mb4_general_ci", val.Collation()) + val, err = CastValue(ctx, types.NewBinaryLiteralDatum([]byte{0xE5, 0xA5, 0xBD, 0x81}), &colInfoS, false, false) + require.Error(t, err, "[table:1366]Incorrect string value '\\x81' for column ''") + require.Equal(t, "utf8mb4_general_ci", val.Collation()) + val, err = CastValue(ctx, types.NewDatum([]byte{0xE5, 0xA5, 0xBD, 0x81}), &colInfoS, false, false) + require.Error(t, err, "[table:1366]Incorrect string value '\\x81' for column ''") + require.Equal(t, "utf8mb4_general_ci", val.Collation()) } func TestGetDefaultValue(t *testing.T) { From d7f021b63ebaeb3b155452f22c88e7cc03a2dc64 Mon Sep 17 00:00:00 2001 From: tangenta Date: Wed, 22 Dec 2021 15:22:22 +0800 Subject: [PATCH 3/3] match column info in TestCastValue --- table/column_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/table/column_test.go b/table/column_test.go index 9d95537727067..27e35f94757ba 100644 --- a/table/column_test.go +++ b/table/column_test.go @@ -304,7 +304,7 @@ func TestCastValue(t *testing.T) { _, err = CastValue(ctx, types.NewDatum([]byte{0x32, 0xf0}), &colInfoS, false, true) require.NoError(t, err) - colInfoS.Charset = charset.CharsetUTF8 + colInfoS.Charset = charset.CharsetUTF8MB4 colInfoS.Collate = "utf8mb4_general_ci" val, err = CastValue(ctx, types.NewBinaryLiteralDatum([]byte{0xE5, 0xA5, 0xBD}), &colInfoS, false, false) require.NoError(t, err)