Skip to content

Commit

Permalink
[Feature] Add function bitmap_from_binary and bitmap_to_binary (StarR…
Browse files Browse the repository at this point in the history
…ocks#35621)

Signed-off-by: trueeyu <[email protected]>
  • Loading branch information
trueeyu authored Nov 24, 2023
1 parent 81dfaef commit eb4f691
Show file tree
Hide file tree
Showing 6 changed files with 267 additions and 8 deletions.
55 changes: 54 additions & 1 deletion be/src/exprs/bitmap_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,7 @@ StatusOr<ColumnPtr> BitmapFunctions::bitmap_min(FunctionContext* context, const
return builder.build(ColumnHelper::is_all_const(columns));
}

StatusOr<ColumnPtr> BitmapFunctions::base64_to_bitmap(FunctionContext* context, const starrocks::Columns& columns) {
StatusOr<ColumnPtr> BitmapFunctions::base64_to_bitmap(FunctionContext* context, const Columns& columns) {
ColumnViewer<TYPE_VARCHAR> viewer(columns[0]);
size_t size = columns[0]->size();
ColumnBuilder<TYPE_OBJECT> builder(size);
Expand Down Expand Up @@ -675,4 +675,57 @@ StatusOr<ColumnPtr> BitmapFunctions::bitmap_subset_in_range(FunctionContext* con
return builder.build(ColumnHelper::is_all_const(columns));
}

StatusOr<ColumnPtr> BitmapFunctions::bitmap_to_binary(FunctionContext* context, const starrocks::Columns& columns) {
ColumnViewer<TYPE_OBJECT> viewer(columns[0]);

size_t size = columns[0]->size();
ColumnBuilder<TYPE_VARBINARY> builder(size);

raw::RawString buf;
for (int row = 0; row < size; ++row) {
BitmapValue* bitmap = viewer.value(row);
size_t serialize_size = bitmap->getSizeInBytes();
buf.resize(serialize_size);
bitmap->write(buf.data());
builder.append(Slice(buf.data(), serialize_size));
}

ColumnPtr col = builder.build(ColumnHelper::is_all_const(columns));
std::string err_msg;
if (col->capacity_limit_reached(&err_msg)) {
return Status::InternalError(
strings::Substitute("Size of binary column generated by bitmap_to_binary reaches limit: $0", err_msg));
} else {
return col;
}
}

StatusOr<ColumnPtr> BitmapFunctions::bitmap_from_binary(FunctionContext* context, const Columns& columns) {
ColumnViewer<TYPE_VARBINARY> viewer(columns[0]);
size_t size = columns[0]->size();
ColumnBuilder<TYPE_OBJECT> builder(size);

for (int row = 0; row < size; ++row) {
if (viewer.is_null(row)) {
builder.append_null();
continue;
}

auto src_value = viewer.value(row);
if (src_value.size == 0) {
builder.append_null();
continue;
}

BitmapValue bitmap;
bool res = bitmap.valid_and_deserialize(src_value.data, src_value.size);
if (!res) {
builder.append_null();
} else {
builder.append(std::move(bitmap));
}
}
return builder.build(ColumnHelper::is_all_const(columns));
}

} // namespace starrocks
14 changes: 14 additions & 0 deletions be/src/exprs/bitmap_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,20 @@ class BitmapFunctions {
* @return TYPE_OBJECT
*/
DEFINE_VECTORIZED_FN(bitmap_subset_limit);

/**
* @param:
* @paramType columns: [TYPE_BITMAP]
* @return TYPE_VARCHAR
*/
DEFINE_VECTORIZED_FN(bitmap_to_binary);

/**
* @param
* @paramType columns: [TYPE_VARCHAR]
* @return TYPE_BITMAP
*/
DEFINE_VECTORIZED_FN(bitmap_from_binary);
};

} // namespace starrocks
5 changes: 4 additions & 1 deletion be/src/types/bitmap_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,10 @@ class BitmapValue {
private:
void _from_bitmap_to_smaller_type();
void _from_set_to_bitmap();
inline void _copy_on_write() {

// The implementation of this function needs to place .h,
// otherwise it cannot be inlined and affects the performance of BitmapValue::add.
ALWAYS_INLINE void _copy_on_write() {
if (UNLIKELY(_bitmap == nullptr)) {
_bitmap = std::make_shared<detail::Roaring64Map>();
return;
Expand Down
14 changes: 8 additions & 6 deletions gensrc/script/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,18 +426,18 @@
[50231, 'to_days', 'INT', ['DATE'], 'TimeFunctions::to_days'],
[50241, 'date_format', 'VARCHAR', ['DATETIME', 'VARCHAR'], 'TimeFunctions::datetime_format', 'TimeFunctions::format_prepare', 'TimeFunctions::format_close'],
[50242, 'date_format', 'VARCHAR', ['DATE', 'VARCHAR'], 'TimeFunctions::date_format', 'TimeFunctions::format_prepare', 'TimeFunctions::format_close'],

# From string to DATE/DATETIME
# the function will call by FE getStrToDateFunction, and is invisible to user
[50240, 'str_to_date', 'DATETIME', ['VARCHAR', 'VARCHAR'], 'TimeFunctions::str_to_date', 'TimeFunctions::str_to_date_prepare', 'TimeFunctions::str_to_date_close'],
[50243, 'str2date', 'DATE', ['VARCHAR', 'VARCHAR'], 'TimeFunctions::str2date', 'TimeFunctions::str_to_date_prepare', 'TimeFunctions::str_to_date_close'],

# Joda Time parse & format
[50244, 'str_to_jodatime', 'DATETIME', ['VARCHAR', 'VARCHAR'],
'TimeFunctions::parse_jodatime',
'TimeFunctions::parse_joda_prepare',
[50244, 'str_to_jodatime', 'DATETIME', ['VARCHAR', 'VARCHAR'],
'TimeFunctions::parse_jodatime',
'TimeFunctions::parse_joda_prepare',
'TimeFunctions::parse_joda_close'],

[50260, 'jodatime_format', 'VARCHAR', ['DATETIME', 'VARCHAR'], 'TimeFunctions::jodadatetime_format', 'TimeFunctions::jodatime_format_prepare', 'TimeFunctions::jodatime_format_close'],
[50261, 'jodatime_format', 'VARCHAR', ['DATE', 'VARCHAR'], 'TimeFunctions::jodadate_format', 'TimeFunctions::jodatime_format_prepare', 'TimeFunctions::jodatime_format_close'],

Expand Down Expand Up @@ -630,6 +630,8 @@
[91000, 'sub_bitmap', 'BITMAP', ['BITMAP', 'BIGINT', 'BIGINT'], 'BitmapFunctions::sub_bitmap', False],
[91001, 'bitmap_subset_limit', 'BITMAP', ['BITMAP', 'BIGINT', 'BIGINT'], 'BitmapFunctions::bitmap_subset_limit', False],
[91002, 'bitmap_subset_in_range', 'BITMAP', ['BITMAP', 'BIGINT', 'BIGINT'], 'BitmapFunctions::bitmap_subset_in_range', False],
[91003, 'bitmap_to_binary', 'VARBINARY', ['BITMAP'], 'BitmapFunctions::bitmap_to_binary', False],
[91004, 'bitmap_from_binary', 'BITMAP', ['VARBINARY'], 'BitmapFunctions::bitmap_from_binary', False],

# hash function
[100010, 'murmur_hash3_32', 'INT', ['VARCHAR', '...'], 'HashFunctions::murmur_hash3_32'],
Expand Down
128 changes: 128 additions & 0 deletions test/sql/test_bitmap_functions/R/test_bitmap_functions
Original file line number Diff line number Diff line change
Expand Up @@ -279,4 +279,132 @@ None
1
1,2
1,2
-- !result
-- name: test_bitmap_binary
CREATE TABLE `t1` (
`c1` int(11) NULL COMMENT "",
`c2` bitmap BITMAP_UNION NULL COMMENT ""
) ENGINE=OLAP
AGGREGATE KEY(`c1`)
DISTRIBUTED BY HASH(`c1`) BUCKETS 1
PROPERTIES ("replication_num" = "1");
-- result:
-- !result
insert into t1 values (1, bitmap_empty());
-- result:
-- !result
select c1, hex(bitmap_to_binary(c2)) from t1;
-- result:
1 00
-- !result
select c1, bitmap_count(bitmap_from_binary(bitmap_to_binary(c2))) from t1;
-- result:
1 0
-- !result
truncate table t1;
-- result:
-- !result
insert into t1 values (1, to_bitmap(1));
-- result:
-- !result
select c1, hex(bitmap_to_binary(c2)) from t1;
-- result:
1 0101000000
-- !result
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1;
-- result:
1 1
-- !result
truncate table t1;
-- result:
-- !result
insert into t1 values (1, to_bitmap(17179869184));
-- result:
-- !result
select c1, hex(bitmap_to_binary(c2)) from t1;
-- result:
1 030000000004000000
-- !result
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1;
-- result:
1 17179869184
-- !result
truncate table t1;
-- result:
-- !result
insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(1, 5));
-- result:
-- !result
select c1, hex(bitmap_to_binary(c2)) from t1;
-- result:
1 0A0500000001000000000000000200000000000000030000000000000004000000000000000500000000000000
-- !result
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1;
-- result:
1 1,2,3,4,5
-- !result
truncate table t1;
-- result:
-- !result
insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(1, 40));
-- result:
-- !result
select c1, hex(bitmap_to_binary(c2)) from t1;
-- result:
1 023B3000000100002700010001002700
-- !result
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1;
-- result:
1 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40
-- !result
truncate table t1;
-- result:
-- !result
insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(1, 20));
-- result:
-- !result
insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(17179869184, 17179869284));
-- result:
-- !result
select c1, hex(bitmap_to_binary(c2)) from t1;
-- result:
1 0402000000003A3000000100000000001300100000000100020003000400050006000700080009000A000B000C000D000E000F0010001100120013001400040000003B3000000100006400010000006400
-- !result
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1;
-- result:
1 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,17179869184,17179869185,17179869186,17179869187,17179869188,17179869189,17179869190,17179869191,17179869192,17179869193,17179869194,17179869195,17179869196,17179869197,17179869198,17179869199,17179869200,17179869201,17179869202,17179869203,17179869204,17179869205,17179869206,17179869207,17179869208,17179869209,17179869210,17179869211,17179869212,17179869213,17179869214,17179869215,17179869216,17179869217,17179869218,17179869219,17179869220,17179869221,17179869222,17179869223,17179869224,17179869225,17179869226,17179869227,17179869228,17179869229,17179869230,17179869231,17179869232,17179869233,17179869234,17179869235,17179869236,17179869237,17179869238,17179869239,17179869240,17179869241,17179869242,17179869243,17179869244,17179869245,17179869246,17179869247,17179869248,17179869249,17179869250,17179869251,17179869252,17179869253,17179869254,17179869255,17179869256,17179869257,17179869258,17179869259,17179869260,17179869261,17179869262,17179869263,17179869264,17179869265,17179869266,17179869267,17179869268,17179869269,17179869270,17179869271,17179869272,17179869273,17179869274,17179869275,17179869276,17179869277,17179869278,17179869279,17179869280,17179869281,17179869282,17179869283,17179869284
-- !result
truncate table t1;
-- result:
-- !result
insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(1, 80));
-- result:
-- !result
insert into t1 select 2, bitmap_agg(generate_series) from table(generate_series(1, 200));
-- result:
-- !result
insert into t1 select 2, bitmap_agg(generate_series) from table(generate_series(900, 910));
-- result:
-- !result
select c1, hex(bitmap_to_binary(c2)) from t1 order by c1;
-- result:
1 023B3000000100004F00010001004F00
2 023B300000010000D20002000100C70084030A00
-- !result
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1 order by c1;
-- result:
1 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80
2 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,900,901,902,903,904,905,906,907,908,909,910
-- !result
select bitmap_from_binary(to_binary("1234"));
-- result:
None
-- !result
select bitmap_from_binary(to_binary(""));
-- result:
None
-- !result
select bitmap_from_binary(null);
-- result:
None
-- !result
59 changes: 59 additions & 0 deletions test/sql/test_bitmap_functions/T/test_bitmap_functions
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,62 @@ select bitmap_to_string(bitmap_subset_in_range(v1, 3, 2)) from test_bitmap_table
select bitmap_to_string(bitmap_subset_in_range(v1, 2, 3)) from test_bitmap_table1 order by k1;
select bitmap_to_string(bitmap_subset_in_range(v1, -2, 3)) from test_bitmap_table1 order by k1;

-- name: test_bitmap_binary
CREATE TABLE `t1` (
`c1` int(11) NULL COMMENT "",
`c2` bitmap BITMAP_UNION NULL COMMENT ""
) ENGINE=OLAP
AGGREGATE KEY(`c1`)
DISTRIBUTED BY HASH(`c1`) BUCKETS 1
PROPERTIES ("replication_num" = "1");

-- empty bitmap
insert into t1 values (1, bitmap_empty());
select c1, hex(bitmap_to_binary(c2)) from t1;
select c1, bitmap_count(bitmap_from_binary(bitmap_to_binary(c2))) from t1;

-- single 32bit bitmap
truncate table t1;
insert into t1 values (1, to_bitmap(1));
select c1, hex(bitmap_to_binary(c2)) from t1;
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1;

-- single 64bit bitmap
truncate table t1;
insert into t1 values (1, to_bitmap(17179869184));
select c1, hex(bitmap_to_binary(c2)) from t1;
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1;

-- set bitmap
truncate table t1;
insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(1, 5));
select c1, hex(bitmap_to_binary(c2)) from t1;
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1;

-- RoaringBitmap32
truncate table t1;
insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(1, 40));
select c1, hex(bitmap_to_binary(c2)) from t1;
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1;

-- RoaringBitmap64
truncate table t1;
insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(1, 20));
insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(17179869184, 17179869284));
select c1, hex(bitmap_to_binary(c2)) from t1;
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1;

-- Buf Resize
truncate table t1;
insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(1, 80));
insert into t1 select 2, bitmap_agg(generate_series) from table(generate_series(1, 200));
insert into t1 select 2, bitmap_agg(generate_series) from table(generate_series(900, 910));
select c1, hex(bitmap_to_binary(c2)) from t1 order by c1;
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1 order by c1;

-- Invalid format
select bitmap_from_binary(to_binary("1234"));
select bitmap_from_binary(to_binary(""));

-- Input is null
select bitmap_from_binary(null);

0 comments on commit eb4f691

Please sign in to comment.