Skip to content

Commit

Permalink
sql: enable storage for tsvector/tsquery
Browse files Browse the repository at this point in the history
This commit adds the ability to store tsvector and tsquery data in
ordinary, unindexed columns.

This functionality is gated behind the 23.1 version.

Release note (sql change): permit non-indexed storage of tsvector and
tsquery datatypes
  • Loading branch information
jordanlewis committed Dec 12, 2022
1 parent aae18c7 commit 274ad78
Show file tree
Hide file tree
Showing 10 changed files with 138 additions and 29 deletions.
2 changes: 1 addition & 1 deletion pkg/sql/alter_column_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ func AlterColumnType(
}
}

err = colinfo.ValidateColumnDefType(typ)
err = colinfo.ValidateColumnDefType(ctx, params.EvalContext().Settings.Version, typ)
if err != nil {
return err
}
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/catalog/colinfo/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ go_library(
importpath = "github.com/cockroachdb/cockroach/pkg/sql/catalog/colinfo",
visibility = ["//visibility:public"],
deps = [
"//pkg/clusterversion",
"//pkg/sql/catalog",
"//pkg/sql/catalog/catpb",
"//pkg/sql/catalog/descpb",
Expand Down
12 changes: 10 additions & 2 deletions pkg/sql/catalog/colinfo/col_type_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@
package colinfo

import (
"context"
"fmt"

"github.com/cockroachdb/cockroach/pkg/clusterversion"
"github.com/cockroachdb/cockroach/pkg/sql/catalog"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
Expand Down Expand Up @@ -68,7 +70,7 @@ func (ti ColTypeInfo) Type(idx int) *types.T {

// ValidateColumnDefType returns an error if the type of a column definition is
// not valid. It is checked when a column is created or altered.
func ValidateColumnDefType(t *types.T) error {
func ValidateColumnDefType(ctx context.Context, version clusterversion.Handle, t *types.T) error {
switch t.Family() {
case types.StringFamily, types.CollatedStringFamily:
if t.Family() == types.CollatedStringFamily {
Expand Down Expand Up @@ -100,7 +102,7 @@ func ValidateColumnDefType(t *types.T) error {
if err := types.CheckArrayElementType(t.ArrayContents()); err != nil {
return err
}
return ValidateColumnDefType(t.ArrayContents())
return ValidateColumnDefType(ctx, version, t.ArrayContents())

case types.BitFamily, types.IntFamily, types.FloatFamily, types.BoolFamily, types.BytesFamily, types.DateFamily,
types.INetFamily, types.IntervalFamily, types.JsonFamily, types.OidFamily, types.TimeFamily,
Expand All @@ -116,6 +118,12 @@ func ValidateColumnDefType(t *types.T) error {
return unimplemented.NewWithIssue(70099, "cannot use table record type as table column")
}

case types.TSQueryFamily, types.TSVectorFamily:
if !version.IsActive(ctx, clusterversion.V23_1) {
return pgerror.Newf(pgcode.FeatureNotSupported,
"TSVector/TSQuery not supported until version 23.1")
}

default:
return pgerror.Newf(pgcode.InvalidTableDefinition,
"value type %s cannot be used for table columns", t.String())
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/catalog/tabledesc/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ func MakeColumnDefDescs(
if err != nil {
return nil, err
}
if err = colinfo.ValidateColumnDefType(resType); err != nil {
if err = colinfo.ValidateColumnDefType(ctx, evalCtx.Settings.Version, resType); err != nil {
return nil, err
}
col.Type = resType
Expand Down
106 changes: 91 additions & 15 deletions pkg/sql/logictest/testdata/logic_test/tsvector
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,103 @@ SELECT 'foo:1,2 bar:3'::tsvector @@ 'foo <-> bar'::tsquery, 'foo <-> bar'::tsque
----
true true

statement error cannot be used for table columns
CREATE TABLE a (v tsvector)

statement error cannot be used for table columns
CREATE TABLE a (q tsquery)

# Uncomment once tsvector and tsquery are usable as table columns.
#
# statement ok
# INSERT INTO a VALUES('foo:1,2 bar:4B'::tsvector, 'foo <2> bar'::tsquery)
#
# query TT
# SELECT * FROM a
# ----
# 'bar':4B 'foo':1,2 'foo' <2> 'bar'
statement ok
CREATE TABLE a (v tsvector, q tsquery)

statement ok
INSERT INTO a VALUES('foo:1,2 bar:4B'::tsvector, 'foo <2> bar'::tsquery)

query TT
SELECT * FROM a
----
'bar':4B 'foo':1,2 'foo' <2> 'bar'

query BB
SELECT 'foo:1,2 bar:4B'::tsvector @@ 'foo <2> bar'::tsquery, 'foo:1,2 bar:4B' @@ 'foo <-> bar'::tsquery
----
true false

query BB
SELECT v @@ 'foo <2> bar'::tsquery, v @@ 'foo <-> bar'::tsquery FROM a
----
true false

query B
SELECT v @@ q FROM a
----
true

# Test column modifiers.

statement ok
CREATE TABLE b (a INT PRIMARY KEY DEFAULT 1, v tsvector DEFAULT 'foo:1' ON UPDATE 'bar:2', v2 tsvector AS (v) STORED, v3 tsvector AS (v) VIRTUAL)

statement ok
CREATE TABLE c (a INT PRIMARY KEY DEFAULT 1, q tsquery DEFAULT 'foo' ON UPDATE 'bar', q2 tsquery AS (q) STORED, q3 tsquery AS (q) VIRTUAL)

statement ok
INSERT INTO b DEFAULT VALUES

statement ok
INSERT INTO c DEFAULT VALUES

query ITTT
SELECT * FROM b
----
1 'foo':1 'foo':1 'foo':1

query ITTT
SELECT * FROM c
----
1 'foo' 'foo' 'foo'

statement ok
UPDATE b SET a = 2 WHERE a = 1

statement ok
UPDATE c SET a = 2 WHERE a = 1

query ITTT
SELECT * FROM b
----
2 'bar':2 'bar':2 'bar':2

query ITTT
SELECT * FROM c
----
2 'bar' 'bar' 'bar'

statement ok
INSERT INTO b VALUES (3, 'foo:1,5 zoop:3')

statement error can't order by column type TSVECTOR
SELECT * FROM b ORDER BY v

statement error can't order by column type TSQUERY
SELECT * FROM c ORDER BY q

statement error arrays of tsvector not allowed
CREATE TABLE tsarray(a tsvector[])

statement error arrays of tsquery not allowed
CREATE TABLE tsarray(a tsquery[])

statement error unsupported comparison operator
SELECT a, v FROM b WHERE v > 'bar:2'::tsvector

statement error unsupported comparison operator
SELECT a, q FROM c WHERE q > 'abc'::tsquery

query IT
SELECT a, v FROM b WHERE v = 'bar:2'::tsvector
----
2 'bar':2

query IT
SELECT a, q FROM c WHERE q = 'bar'::tsquery
----
2 'bar'

# Ensure truncation of long position lists.
query T
SELECT ('foo:' || string_agg(g::TEXT,','))::tsvector from generate_series(1,280) g(g);
Expand Down
9 changes: 7 additions & 2 deletions pkg/sql/opt/optbuilder/orderby.go
Original file line number Diff line number Diff line change
Expand Up @@ -285,8 +285,13 @@ func (b *Builder) analyzeExtraArgument(

func ensureColumnOrderable(e tree.TypedExpr) {
typ := e.ResolvedType()
if typ.Family() == types.JsonFamily ||
(typ.Family() == types.ArrayFamily && typ.ArrayContents().Family() == types.JsonFamily) {
if typ.Family() == types.ArrayFamily {
typ = typ.ArrayContents()
}
switch typ.Family() {
case types.JsonFamily:
panic(unimplementedWithIssueDetailf(35706, "", "can't order by column type jsonb"))
case types.TSQueryFamily, types.TSVectorFamily:
panic(unimplementedWithIssueDetailf(92165, "", "can't order by column type %s", typ.SQLString()))
}
}
1 change: 1 addition & 0 deletions pkg/sql/randgen/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ go_library(
"//pkg/geo/geopb",
"//pkg/keys",
"//pkg/roachpb",
"//pkg/settings/cluster",
"//pkg/sql/catalog",
"//pkg/sql/catalog/colinfo",
"//pkg/sql/catalog/descpb",
Expand Down
2 changes: 2 additions & 0 deletions pkg/sql/randgen/doc.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,7 @@
/*
Package randgen provides utility functions for generating random syntax trees,
datums, encoded datums, types, and more. It useful in randomized tests.
It should not be used outside of a testing context.
*/
package randgen
14 changes: 11 additions & 3 deletions pkg/sql/randgen/type.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
package randgen

import (
"context"
"math/rand"
"sort"

clustersettings "github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/colinfo"
"github.com/cockroachdb/cockroach/pkg/sql/rowenc/valueside"
"github.com/cockroachdb/cockroach/pkg/sql/types"
Expand Down Expand Up @@ -152,15 +154,19 @@ func IsLegalColumnType(typ *types.T) bool {
// user-created tables.
return false
}
return colinfo.ValidateColumnDefType(typ) == nil
ctx := context.Background()
version := clustersettings.MakeTestingClusterSettings().Version
return colinfo.ValidateColumnDefType(ctx, version, typ) == nil
}

// RandArrayType generates a random array type.
func RandArrayType(rng *rand.Rand) *types.T {
ctx := context.Background()
version := clustersettings.MakeTestingClusterSettings().Version
for {
typ := RandColumnType(rng)
resTyp := types.MakeArray(typ)
if err := colinfo.ValidateColumnDefType(resTyp); err == nil {
if err := colinfo.ValidateColumnDefType(ctx, version, resTyp); err == nil {
return resTyp
}
}
Expand Down Expand Up @@ -247,11 +253,13 @@ func RandEncodableType(rng *rand.Rand) *types.T {
// TODO(andyk): Remove this workaround once #36736 is resolved. Replace calls to
// it with calls to RandColumnTypes.
func RandEncodableColumnTypes(rng *rand.Rand, numCols int) []*types.T {
ctx := context.Background()
version := clustersettings.MakeTestingClusterSettings().Version
types := make([]*types.T, numCols)
for i := range types {
for {
types[i] = RandEncodableType(rng)
if err := colinfo.ValidateColumnDefType(types[i]); err == nil {
if err := colinfo.ValidateColumnDefType(ctx, version, types[i]); err == nil {
break
}
}
Expand Down
18 changes: 13 additions & 5 deletions pkg/sql/virtual_schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,10 +196,14 @@ func (t virtualSchemaTable) initVirtualTableDesc(
privilege.List{},
username.NodeUserName(),
),
nil, /* affected */
&semaCtx, /* semaCtx */
nil, /* evalCtx */
&sessiondata.SessionData{}, /* sessionData */
nil, /* affected */
&semaCtx, /* semaCtx */
// We explicitly pass in a half-baked EvalContext because we don't need to
// evaluate any expressions to initialize virtual tables. We do need to
// pass in the cluster settings to make sure that functions can properly
// evaluate version gates, though.
&eval.Context{Settings: st}, /* evalCtx */
&sessiondata.SessionData{}, /* sessionData */
tree.PersistencePermanent,
)
if err != nil {
Expand Down Expand Up @@ -323,7 +327,11 @@ func (v virtualSchemaView) initVirtualTableDesc(
username.NodeUserName(),
),
nil, // semaCtx
nil, // evalCtx
// We explicitly pass in a half-baked EvalContext because we don't need to
// evaluate any expressions to initialize virtual tables. We do need to
// pass in the cluster settings to make sure that functions can properly
// evaluate version gates, though.
&eval.Context{Settings: st}, /* evalCtx */
st,
tree.PersistencePermanent,
false, // isMultiRegion
Expand Down

0 comments on commit 274ad78

Please sign in to comment.