-
Notifications
You must be signed in to change notification settings - Fork 54
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1034 from ClickHouse/json_string_column
feat: json string column
- Loading branch information
Showing
11 changed files
with
330 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
00000000 34 39 ad b3 8d 96 d2 87 bb 3b aa 1e 3f 4b 64 f5 |49.......;..?Kd.| | ||
00000010 82 1d 00 00 00 af 00 00 00 8f 48 65 6c 6c 6f 21 |..........Hello!| | ||
00000020 0a 48 07 00 8d 70 48 65 6c 6c 6f 21 0a |.H...pHello!.| |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
00000000 01 00 00 00 00 00 00 00 11 7b 22 78 22 3a 20 35 |.........{"x": 5| | ||
00000010 2c 20 22 79 22 3a 20 31 30 7d 1b 7b 22 61 22 3a |, "y": 10}.{"a":| | ||
00000020 20 22 74 65 73 74 22 2c 20 22 62 22 3a 20 22 74 | "test", "b": "t| | ||
00000030 65 73 74 32 22 7d 21 7b 22 61 22 3a 20 22 6f 62 |est2"}!{"a": "ob| | ||
00000040 6a 20 74 65 73 74 22 2c 20 22 62 22 3a 20 7b 22 |j test", "b": {"| | ||
00000050 63 22 3a 20 32 30 7d 7d |c": 20}}| |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
00000000 01 00 00 00 00 00 00 00 11 7b 22 78 22 3a 20 35 |.........{"x": 5| | ||
00000010 2c 20 22 79 22 3a 20 31 30 7d 1b 7b 22 61 22 3a |, "y": 10}.{"a":| | ||
00000020 20 22 74 65 73 74 22 2c 20 22 62 22 3a 20 22 74 | "test", "b": "t| | ||
00000030 65 73 74 32 22 7d 21 7b 22 61 22 3a 20 22 6f 62 |est2"}!{"a": "ob| | ||
00000040 6a 20 74 65 73 74 22 2c 20 22 62 22 3a 20 7b 22 |j test", "b": {"| | ||
00000050 63 22 3a 20 32 30 7d 7d |c": 20}}| |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
00000000 01 00 00 00 00 00 00 00 11 7b 22 78 22 3a 20 35 |.........{"x": 5| | ||
00000010 2c 20 22 79 22 3a 20 31 30 7d 1b 7b 22 61 22 3a |, "y": 10}.{"a":| | ||
00000020 20 22 74 65 73 74 22 2c 20 22 62 22 3a 20 22 74 | "test", "b": "t| | ||
00000030 65 73 74 32 22 7d 21 7b 22 61 22 3a 20 22 6f 62 |est2"}!{"a": "ob| | ||
00000040 6a 20 74 65 73 74 22 2c 20 22 62 22 3a 20 7b 22 |j test", "b": {"| | ||
00000050 63 22 3a 20 32 30 7d 7d |c": 20}}| |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
package proto | ||
|
||
import ( | ||
"github.com/go-faster/errors" | ||
) | ||
|
||
const JSONStringSerializationVersion uint64 = 1 | ||
|
||
// ColJSONStr represents String column. | ||
// | ||
// Use ColJSONBytes for []bytes ColumnOf implementation. | ||
type ColJSONStr struct { | ||
str ColStr | ||
} | ||
|
||
// Append string to column. | ||
func (c *ColJSONStr) Append(v string) { | ||
c.str.Append(v) | ||
} | ||
|
||
// AppendBytes append byte slice as string to column. | ||
func (c *ColJSONStr) AppendBytes(v []byte) { | ||
c.str.AppendBytes(v) | ||
} | ||
|
||
func (c *ColJSONStr) AppendArr(v []string) { | ||
c.str.AppendArr(v) | ||
} | ||
|
||
// Compile-time assertions for ColJSONStr. | ||
var ( | ||
_ ColInput = ColJSONStr{} | ||
_ ColResult = (*ColJSONStr)(nil) | ||
_ Column = (*ColJSONStr)(nil) | ||
_ ColumnOf[string] = (*ColJSONStr)(nil) | ||
_ Arrayable[string] = (*ColJSONStr)(nil) | ||
) | ||
|
||
// Type returns ColumnType of JSON. | ||
func (ColJSONStr) Type() ColumnType { | ||
return ColumnTypeJSON | ||
} | ||
|
||
// Rows returns count of rows in column. | ||
func (c ColJSONStr) Rows() int { | ||
return c.str.Rows() | ||
} | ||
|
||
// Reset resets data in row, preserving capacity for efficiency. | ||
func (c *ColJSONStr) Reset() { | ||
c.str.Reset() | ||
} | ||
|
||
// EncodeColumn encodes String rows to *Buffer. | ||
func (c ColJSONStr) EncodeColumn(b *Buffer) { | ||
b.PutUInt64(JSONStringSerializationVersion) | ||
|
||
c.str.EncodeColumn(b) | ||
} | ||
|
||
// WriteColumn writes JSON rows to *Writer. | ||
func (c ColJSONStr) WriteColumn(w *Writer) { | ||
w.ChainBuffer(func(b *Buffer) { | ||
b.PutUInt64(JSONStringSerializationVersion) | ||
}) | ||
|
||
c.str.WriteColumn(w) | ||
} | ||
|
||
// ForEach calls f on each string from column. | ||
func (c ColJSONStr) ForEach(f func(i int, s string) error) error { | ||
return c.str.ForEach(f) | ||
} | ||
|
||
// First returns the first row of the column. | ||
func (c ColJSONStr) First() string { | ||
return c.str.First() | ||
} | ||
|
||
// Row returns row with number i. | ||
func (c ColJSONStr) Row(i int) string { | ||
return c.str.Row(i) | ||
} | ||
|
||
// RowBytes returns row with number i as byte slice. | ||
func (c ColJSONStr) RowBytes(i int) []byte { | ||
return c.str.RowBytes(i) | ||
} | ||
|
||
// ForEachBytes calls f on each string from column as byte slice. | ||
func (c ColJSONStr) ForEachBytes(f func(i int, b []byte) error) error { | ||
return c.str.ForEachBytes(f) | ||
} | ||
|
||
// DecodeColumn decodes String rows from *Reader. | ||
func (c *ColJSONStr) DecodeColumn(r *Reader, rows int) error { | ||
jsonSerializationVersion, err := r.UInt64() | ||
if err != nil { | ||
return errors.Wrap(err, "failed to read json serialization version") | ||
} | ||
|
||
if jsonSerializationVersion != JSONStringSerializationVersion { | ||
return errors.Errorf("received invalid JSON string serialization version %d. Setting \"output_format_native_write_json_as_string\" must be enabled.", jsonSerializationVersion) | ||
} | ||
|
||
return c.str.DecodeColumn(r, rows) | ||
} | ||
|
||
// LowCardinality returns LowCardinality(JSON). | ||
func (c *ColJSONStr) LowCardinality() *ColLowCardinality[string] { | ||
return c.str.LowCardinality() | ||
} | ||
|
||
// Array is helper that creates Array(JSON). | ||
func (c *ColJSONStr) Array() *ColArr[string] { | ||
return c.str.Array() | ||
} | ||
|
||
// Nullable is helper that creates Nullable(JSON). | ||
func (c *ColJSONStr) Nullable() *ColNullable[string] { | ||
return c.str.Nullable() | ||
} | ||
|
||
// ColJSONBytes is ColJSONStr wrapper to be ColumnOf for []byte. | ||
type ColJSONBytes struct { | ||
ColJSONStr | ||
} | ||
|
||
// Row returns row with number i. | ||
func (c ColJSONBytes) Row(i int) []byte { | ||
return c.RowBytes(i) | ||
} | ||
|
||
// Append byte slice to column. | ||
func (c *ColJSONBytes) Append(v []byte) { | ||
c.AppendBytes(v) | ||
} | ||
|
||
// AppendArr append slice of byte slices to column. | ||
func (c *ColJSONBytes) AppendArr(v [][]byte) { | ||
for _, s := range v { | ||
c.Append(s) | ||
} | ||
} | ||
|
||
// Array is helper that creates Array(JSON). | ||
func (c *ColJSONBytes) Array() *ColArr[[]byte] { | ||
return &ColArr[[]byte]{ | ||
Data: c, | ||
} | ||
} | ||
|
||
// Nullable is helper that creates Nullable(JSON). | ||
func (c *ColJSONBytes) Nullable() *ColNullable[[]byte] { | ||
return &ColNullable[[]byte]{ | ||
Values: c, | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
package proto | ||
|
||
import ( | ||
"bytes" | ||
"io" | ||
"testing" | ||
|
||
"github.com/stretchr/testify/require" | ||
|
||
"github.com/ClickHouse/ch-go/internal/gold" | ||
) | ||
|
||
var testJSONValues = []string{ | ||
"{\"x\": 5, \"y\": 10}", | ||
"{\"a\": \"test\", \"b\": \"test2\"}", | ||
"{\"a\": \"obj test\", \"b\": {\"c\": 20}}", | ||
} | ||
|
||
func TestColJSONBytes(t *testing.T) { | ||
testColumn(t, "json_bytes", func() ColumnOf[[]byte] { | ||
return new(ColJSONBytes) | ||
}, []byte(testJSONValues[0]), []byte(testJSONValues[1]), []byte(testJSONValues[2])) | ||
} | ||
|
||
func TestColJSONStr_AppendBytes(t *testing.T) { | ||
var data ColJSONStr | ||
|
||
data.AppendBytes([]byte(testJSONValues[0])) | ||
data.AppendBytes([]byte(testJSONValues[1])) | ||
data.AppendBytes([]byte(testJSONValues[2])) | ||
|
||
var buf Buffer | ||
data.EncodeColumn(&buf) | ||
|
||
t.Run("Golden", func(t *testing.T) { | ||
gold.Bytes(t, buf.Buf, "col_json_str_bytes") | ||
}) | ||
t.Run("Ok", func(t *testing.T) { | ||
br := bytes.NewReader(buf.Buf) | ||
r := NewReader(br) | ||
|
||
var dec ColJSONStr | ||
require.NoError(t, dec.DecodeColumn(r, 3)) | ||
require.Equal(t, data, dec) | ||
|
||
t.Run("ForEach", func(t *testing.T) { | ||
var output []string | ||
f := func(i int, s string) error { | ||
output = append(output, s) | ||
return nil | ||
} | ||
require.NoError(t, dec.ForEach(f)) | ||
require.Equal(t, testJSONValues, output) | ||
}) | ||
}) | ||
} | ||
|
||
func TestColJSONStr_EncodeColumn(t *testing.T) { | ||
var data ColJSONStr | ||
|
||
input := testJSONValues | ||
rows := len(input) | ||
for _, s := range input { | ||
data.Append(s) | ||
} | ||
|
||
var buf Buffer | ||
data.EncodeColumn(&buf) | ||
|
||
t.Run("Golden", func(t *testing.T) { | ||
gold.Bytes(t, buf.Buf, "col_json_str") | ||
}) | ||
t.Run("Ok", func(t *testing.T) { | ||
br := bytes.NewReader(buf.Buf) | ||
r := NewReader(br) | ||
|
||
var dec ColJSONStr | ||
require.NoError(t, dec.DecodeColumn(r, rows)) | ||
require.Equal(t, data, dec) | ||
|
||
t.Run("ForEach", func(t *testing.T) { | ||
var output []string | ||
f := func(i int, s string) error { | ||
output = append(output, s) | ||
return nil | ||
} | ||
require.NoError(t, dec.ForEach(f)) | ||
require.Equal(t, input, output) | ||
}) | ||
}) | ||
t.Run("EOF", func(t *testing.T) { | ||
r := NewReader(bytes.NewReader(nil)) | ||
|
||
var dec ColJSONStr | ||
require.ErrorIs(t, dec.DecodeColumn(r, rows), io.EOF) | ||
}) | ||
} | ||
|
||
func BenchmarkColJSONStr_DecodeColumn(b *testing.B) { | ||
const rows = 1_000 | ||
var data ColJSONStr | ||
for i := 0; i < rows; i++ { | ||
data.Append("{\"x\": 5}") | ||
} | ||
|
||
var buf Buffer | ||
data.EncodeColumn(&buf) | ||
|
||
br := bytes.NewReader(buf.Buf) | ||
r := NewReader(br) | ||
|
||
var dec ColJSONStr | ||
if err := dec.DecodeColumn(r, rows); err != nil { | ||
b.Fatal(err) | ||
} | ||
|
||
b.SetBytes(int64(len(buf.Buf))) | ||
b.ResetTimer() | ||
b.ReportAllocs() | ||
|
||
for i := 0; i < b.N; i++ { | ||
br.Reset(buf.Buf) | ||
r.raw.Reset(br) | ||
dec.Reset() | ||
|
||
if err := dec.DecodeColumn(r, rows); err != nil { | ||
b.Fatal(err) | ||
} | ||
} | ||
} | ||
|
||
func BenchmarkColJSONStr_EncodeColumn(b *testing.B) { | ||
const rows = 1_000 | ||
var data ColJSONStr | ||
for i := 0; i < rows; i++ { | ||
data.Append("{\"x\": 5}") | ||
} | ||
|
||
var buf Buffer | ||
data.EncodeColumn(&buf) | ||
|
||
b.SetBytes(int64(len(buf.Buf))) | ||
b.ResetTimer() | ||
b.ReportAllocs() | ||
|
||
for i := 0; i < b.N; i++ { | ||
buf.Reset() | ||
data.EncodeColumn(&buf) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters