Skip to content

Commit

Permalink
This is an automated cherry-pick of pingcap#53126
Browse files Browse the repository at this point in the history
Signed-off-by: ti-chi-bot <[email protected]>
  • Loading branch information
YangKeao authored and ti-chi-bot committed May 11, 2024
1 parent a36abf3 commit ff42495
Showing 6 changed files with 3,290 additions and 1 deletion.
40 changes: 39 additions & 1 deletion expression/collation.go
Original file line number Diff line number Diff line change
@@ -319,7 +319,45 @@ func CheckAndDeriveCollationFromExprs(ctx sessionctx.Context, funcName string, e
return nil, illegalMixCollationErr(funcName, args)
}

return ec, nil
return fixStringTypeForMaxLength(funcName, args, ec), nil
}

// fixStringTypeForMaxLength changes the type of string from `VARCHAR` to `MEDIUM BLOB` or `LONG BLOB` according to the max length of
// the argument. However, as TiDB doesn't have `MaxLength` for `FieldType`, this function handles the logic manually for different types. Now it only
// handles the `JSON` type, because in MySQL, `JSON` type has a big max length and will lead to `LONG BLOB` in many situations.
// To learn more about this case, read the discussion under https://github.com/pingcap/tidb/issues/52833
//
// TODO: also consider types other than `JSON`. And also think about when it'll become `MEDIUM BLOB`. This function only handles the collation, but
// not change the type and binary flag.
// TODO: some function will generate big values, like `repeat` and `space`. They should be handled according to the argument if it's a constant.
func fixStringTypeForMaxLength(funcName string, args []Expression, ec *ExprCollation) *ExprCollation {
// Be careful that the `args` is not all arguments of the `funcName`. You should check `deriveCollation` function to see which arguments are passed
// to the `CheckAndDeriveCollationFromExprs` function, and then passed here.
shouldChangeToBin := false

switch funcName {
case ast.Reverse, ast.Lower, ast.Upper, ast.SubstringIndex, ast.Trim, ast.Quote, ast.InsertFunc, ast.Substr, ast.Repeat, ast.Replace:
shouldChangeToBin = args[0].GetType().EvalType() == types.ETJson
case ast.Concat, ast.ConcatWS, ast.Elt, ast.MakeSet:
for _, arg := range args {
if arg.GetType().EvalType() == types.ETJson {
shouldChangeToBin = true
break
}
}
case ast.ExportSet:
if len(args) >= 2 {
shouldChangeToBin = args[0].GetType().EvalType() == types.ETJson || args[1].GetType().EvalType() == types.ETJson
}
if len(args) >= 3 {
shouldChangeToBin = shouldChangeToBin || args[2].GetType().EvalType() == types.ETJson
}
}

if shouldChangeToBin {
ec.Collation = collate.ConvertAndGetBinCollation(ec.Collation)
}
return ec
}

func safeConvert(ctx sessionctx.Context, ec *ExprCollation, args ...Expression) bool {
113 changes: 113 additions & 0 deletions pkg/expression/builtin_ilike.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
// Copyright 2023 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package expression

import (
"github.com/pingcap/tidb/pkg/types"
"github.com/pingcap/tidb/pkg/util/chunk"
"github.com/pingcap/tidb/pkg/util/collate"
"github.com/pingcap/tidb/pkg/util/intest"
"github.com/pingcap/tidb/pkg/util/stringutil"
"github.com/pingcap/tipb/go-tipb"
)

var (
_ functionClass = &ilikeFunctionClass{}
)

var (
_ builtinFunc = &builtinIlikeSig{}
)

type ilikeFunctionClass struct {
baseFunctionClass
}

func (c *ilikeFunctionClass) getFunction(ctx BuildContext, args []Expression) (builtinFunc, error) {
if err := c.verifyArgs(args); err != nil {
return nil, err
}
argTp := []types.EvalType{types.ETString, types.ETString, types.ETInt}
bf, err := newBaseBuiltinFuncWithTp(ctx, c.funcName, args, types.ETInt, argTp...)
if err != nil {
return nil, err
}
bf.tp.SetFlen(1)
sig := &builtinIlikeSig{baseBuiltinFunc: bf}
sig.setPbCode(tipb.ScalarFuncSig_IlikeSig)
return sig, nil
}

type builtinIlikeSig struct {
baseBuiltinFunc
// pattern is not serialized with builtinIlikeSig, treat them as a cache to accelerate
// the evaluation of builtinIlikeSig.
patternCache builtinFuncCache[collate.WildcardPattern]
}

func (b *builtinIlikeSig) Clone() builtinFunc {
newSig := &builtinIlikeSig{}
newSig.cloneFrom(&b.baseBuiltinFunc)
return newSig
}

// evalInt evals a builtinIlikeSig.
func (b *builtinIlikeSig) evalInt(ctx EvalContext, row chunk.Row) (int64, bool, error) {
valStr, isNull, err := b.args[0].EvalString(ctx, row)
if isNull || err != nil {
return 0, isNull, err
}

patternStr, isNull, err := b.args[1].EvalString(ctx, row)
if isNull || err != nil {
return 0, isNull, err
}

escape, isNull, err := b.args[2].EvalInt(ctx, row)
if isNull || err != nil {
return 0, isNull, err
}

valStrBytes := []byte(valStr)
patternStrBytes := []byte(patternStr)

stringutil.LowerOneString(valStrBytes)
if stringutil.IsUpperASCII(byte(escape)) || stringutil.IsLowerASCII(byte(escape)) {
escape = int64(stringutil.LowerOneStringExcludeEscapeChar(patternStrBytes, byte(escape)))
} else {
stringutil.LowerOneString(patternStrBytes)
}

valStr = string(valStrBytes)
patternStr = string(patternStrBytes)

var pattern collate.WildcardPattern
if b.args[1].ConstLevel() >= ConstOnlyInContext && b.args[2].ConstLevel() >= ConstOnlyInContext {
pattern, err = b.patternCache.getOrInitCache(ctx, func() (collate.WildcardPattern, error) {
ret := collate.ConvertAndGetBinCollator(b.collation).Pattern()
ret.Compile(patternStr, byte(escape))
return ret, nil
})

intest.AssertNoError(err)
if err != nil {
return 0, true, err
}
} else {
pattern = collate.ConvertAndGetBinCollator(b.collation).Pattern()
pattern.Compile(patternStr, byte(escape))
}
return boolToInt64(pattern.DoMatch(valStr)), false, nil
}
209 changes: 209 additions & 0 deletions pkg/expression/builtin_ilike_vec.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
// Copyright 2023 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package expression

import (
"github.com/pingcap/errors"
"github.com/pingcap/tidb/pkg/util/chunk"
"github.com/pingcap/tidb/pkg/util/collate"
"github.com/pingcap/tidb/pkg/util/hack"
"github.com/pingcap/tidb/pkg/util/intest"
"github.com/pingcap/tidb/pkg/util/stringutil"
)

// LowerAlphaASCII only lowers alpha ascii
func LowerAlphaASCII(loweredCol *chunk.Column, rowNum int) {
for i := 0; i < rowNum; i++ {
str := loweredCol.GetString(i)
strBytes := hack.Slice(str)

stringutil.LowerOneString(strBytes)
}
}

// LowerAlphaASCIIExcludeEscapeChar lower alpha ascii exclude escape char
func LowerAlphaASCIIExcludeEscapeChar(loweredCol *chunk.Column, rowNum int, excludedChar int64) int64 {
actualEscapeChar := excludedChar
for i := 0; i < rowNum; i++ {
str := loweredCol.GetString(i)
strBytes := hack.Slice(str)

actualEscapeChar = int64(stringutil.LowerOneStringExcludeEscapeChar(strBytes, byte(excludedChar)))
}
return actualEscapeChar
}

func (b *builtinIlikeSig) vectorized() bool {
return true
}

func (b *builtinIlikeSig) canMemorize(param *funcParam) bool {
return param.getCol() == nil
}

func (b *builtinIlikeSig) tryToVecMemorize(ctx EvalContext, param *funcParam, escape int64) (collate.WildcardPattern, bool) {
if !b.canMemorize(param) {
return nil, false
}

pattern, err := b.patternCache.getOrInitCache(ctx, func() (collate.WildcardPattern, error) {
pattern := collate.ConvertAndGetBinCollator(b.collation).Pattern()
pattern.Compile(param.getStringVal(0), byte(escape))
return pattern, nil
})

intest.AssertNoError(err)
if err != nil {
return nil, false
}

return pattern, true
}

func (b *builtinIlikeSig) getEscape(ctx EvalContext, input *chunk.Chunk, result *chunk.Column) (int64, bool, error) {
rowNum := input.NumRows()
escape := int64('\\')

if b.args[2].ConstLevel() != ConstStrict {
return escape, true, errors.Errorf("escape should be const")
}

escape, isConstNull, err := b.args[2].EvalInt(ctx, chunk.Row{})
if isConstNull {
fillNullStringIntoResult(result, rowNum)
return escape, true, nil
} else if err != nil {
return escape, true, err
}
return escape, false, nil
}

func (b *builtinIlikeSig) lowerExpr(param *funcParam, rowNum int) {
col := param.getCol()
if col == nil {
str := param.getStringVal(0)
strBytes := hack.Slice(str)
stringutil.LowerOneString(strBytes)
param.setStrVal(str)
return
}

tmpExprCol := param.getCol().CopyConstruct(nil)
LowerAlphaASCII(tmpExprCol, rowNum)
param.setCol(tmpExprCol)
}

func (b *builtinIlikeSig) lowerPattern(param *funcParam, rowNum int, escape int64) int64 {
col := param.getCol()
if col == nil {
str := param.getStringVal(0)
strBytes := hack.Slice(str)
escape = int64(stringutil.LowerOneStringExcludeEscapeChar(strBytes, byte(escape)))
param.setStrVal(str)
return escape
}

tmpPatternCol := param.getCol().CopyConstruct(nil)
escape = LowerAlphaASCIIExcludeEscapeChar(tmpPatternCol, rowNum, escape)
param.setCol(tmpPatternCol)

return escape
}

func (b *builtinIlikeSig) vecVec(pattern collate.WildcardPattern, params []*funcParam, rowNum int, escape int64, result *chunk.Column) error {
result.ResizeInt64(rowNum, false)
result.MergeNulls(params[0].getCol(), params[1].getCol())
i64s := result.Int64s()
for i := 0; i < rowNum; i++ {
if result.IsNull(i) {
continue
}
pattern.Compile(params[1].getStringVal(i), byte(escape))
match := pattern.DoMatch(params[0].getStringVal(i))
i64s[i] = boolToInt64(match)
}
return nil
}

func (b *builtinIlikeSig) constVec(pattern collate.WildcardPattern, expr string, param *funcParam, rowNum int, escape int64, result *chunk.Column) error {
result.ResizeInt64(rowNum, false)
result.MergeNulls(param.getCol())
i64s := result.Int64s()
for i := 0; i < rowNum; i++ {
if result.IsNull(i) {
continue
}
pattern.Compile(param.getStringVal(i), byte(escape))
match := pattern.DoMatch(expr)
i64s[i] = boolToInt64(match)
}
return nil
}

func (b *builtinIlikeSig) ilikeWithMemorization(pattern collate.WildcardPattern, exprParam *funcParam, rowNum int, result *chunk.Column) error {
result.ResizeInt64(rowNum, false)
result.MergeNulls(exprParam.getCol())
i64s := result.Int64s()
for i := 0; i < rowNum; i++ {
if result.IsNull(i) {
continue
}
match := pattern.DoMatch(exprParam.getStringVal(i))
i64s[i] = boolToInt64(match)
}
return nil
}

func (b *builtinIlikeSig) ilikeWithoutMemorization(pattern collate.WildcardPattern, params []*funcParam, rowNum int, escape int64, result *chunk.Column) error {
if params[0].getCol() == nil {
return b.constVec(pattern, params[0].getStringVal(0), params[1], rowNum, escape, result)
}

return b.vecVec(pattern, params, rowNum, escape, result)
}

func (b *builtinIlikeSig) vecEvalInt(ctx EvalContext, input *chunk.Chunk, result *chunk.Column) error {
rowNum := input.NumRows()
params := make([]*funcParam, 0, 3)
defer releaseBuffers(&b.baseBuiltinFunc, params)

for i := 0; i < 2; i++ {
param, isConstNull, err := buildStringParam(ctx, &b.baseBuiltinFunc, i, input, false)
if err != nil {
return ErrRegexp.GenWithStackByArgs(err)
}
if isConstNull {
fillNullStringIntoResult(result, rowNum)
return nil
}
params = append(params, param)
}

escape, ret, err := b.getEscape(ctx, input, result)
if err != nil || ret {
return err
}

b.lowerExpr(params[0], rowNum)
escape = b.lowerPattern(params[1], rowNum, escape)

pattern, ok := b.tryToVecMemorize(ctx, params[1], escape)
if !ok {
pattern = collate.ConvertAndGetBinCollator(b.collation).Pattern()
return b.ilikeWithoutMemorization(pattern, params, rowNum, escape, result)
}

return b.ilikeWithMemorization(pattern, params[0], rowNum, result)
}
Loading

0 comments on commit ff42495

Please sign in to comment.