Skip to content

Commit

Permalink
feat(chsql): add IsSingleToken to check whether string is a token
Browse files Browse the repository at this point in the history
  • Loading branch information
tdakkota committed Jun 20, 2024
1 parent cef5f8f commit 3fcc6d7
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 0 deletions.
25 changes: 25 additions & 0 deletions internal/chstorage/chsql/token.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package chsql

// IsToken whether if given string is a single token.
//
// See https://clickhouse.com/docs/en/sql-reference/functions/string-search-functions#hastoken.
// See https://github.com/ClickHouse/ClickHouse/blob/755b73f3fc99847f40ac4d9186bb19116e709c37/src/Interpreters/ITokenExtractor.cpp#L84.
func IsSingleToken[S ~string | ~[]byte](s S) bool {
if len(s) == 0 {
return false
}
// If string does contain any non-alphanumeric ASCII characters.
// then it is not a single token.
for _, c := range []byte(s) {
if c < 0x80 && !isAlphaNumeric(c) {
return false
}
}
return true
}

func isAlphaNumeric(c byte) bool {
return (c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9')
}
27 changes: 27 additions & 0 deletions internal/chstorage/chsql/token_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package chsql

import (
"fmt"
"testing"

"github.com/stretchr/testify/require"
)

func TestIsSingleToken(t *testing.T) {
tests := []struct {
s string
want bool
}{
{``, false},
{`10`, true},
{`abc`, true},
{`помидоры`, true},
{`abc 10`, false},
}
for i, tt := range tests {
tt := tt
t.Run(fmt.Sprintf("Test%d", i+1), func(t *testing.T) {
require.Equal(t, tt.want, IsSingleToken(tt.s))
})
}
}

0 comments on commit 3fcc6d7

Please sign in to comment.