Skip to content

Commit

Permalink
[Feature] Add function bitmap_from_binary and bitmap_to_binary (#35621)
Browse files Browse the repository at this point in the history
Signed-off-by: trueeyu <[email protected]>
(cherry picked from commit eb4f691)

# Conflicts:
#	be/src/exprs/bitmap_functions.cpp
#	be/src/exprs/bitmap_functions.h
#	gensrc/script/functions.py
  • Loading branch information
trueeyu authored and mergify[bot] committed Nov 29, 2023
1 parent 4787f49 commit 2cde96c
Show file tree
Hide file tree
Showing 6 changed files with 389 additions and 2 deletions.
143 changes: 142 additions & 1 deletion be/src/exprs/bitmap_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,7 @@ StatusOr<ColumnPtr> BitmapFunctions::bitmap_min(FunctionContext* context, const
return builder.build(ColumnHelper::is_all_const(columns));
}

StatusOr<ColumnPtr> BitmapFunctions::base64_to_bitmap(FunctionContext* context, const starrocks::Columns& columns) {
StatusOr<ColumnPtr> BitmapFunctions::base64_to_bitmap(FunctionContext* context, const Columns& columns) {
ColumnViewer<TYPE_VARCHAR> viewer(columns[0]);
size_t size = columns[0]->size();
ColumnBuilder<TYPE_OBJECT> builder(size);
Expand Down Expand Up @@ -598,4 +598,145 @@ StatusOr<ColumnPtr> BitmapFunctions::bitmap_to_base64(FunctionContext* context,
}
return builder.build(ColumnHelper::is_all_const(columns));
}
<<<<<<< HEAD
=======

StatusOr<ColumnPtr> BitmapFunctions::bitmap_subset_limit(FunctionContext* context, const starrocks::Columns& columns) {
RETURN_IF_COLUMNS_ONLY_NULL(columns);

ColumnViewer<TYPE_OBJECT> bitmap_viewer(columns[0]);
ColumnViewer<TYPE_BIGINT> range_start_viewer(columns[1]);
ColumnViewer<TYPE_BIGINT> limit_viewer(columns[2]);

size_t size = columns[0]->size();
ColumnBuilder<TYPE_OBJECT> builder(size);

for (int row = 0; row < size; row++) {
if (bitmap_viewer.is_null(row) || range_start_viewer.is_null(row) || limit_viewer.is_null(row)) {
builder.append_null();
continue;
}

auto bitmap = bitmap_viewer.value(row);
auto range_start = range_start_viewer.value(row);
auto limit = limit_viewer.value(row);

// TODO: the result of bitmap_subset_limit(bitmap, -1, -1) maybe invalid
if (range_start < 0) {
range_start = 0;
}

if (bitmap->cardinality() == 0) {
builder.append_null();
continue;
}

BitmapValue ret_bitmap;
if (bitmap->bitmap_subset_limit_internal(range_start, limit, &ret_bitmap) == 0) {
builder.append_null();
continue;
}

builder.append(std::move(ret_bitmap));
}

return builder.build(ColumnHelper::is_all_const(columns));
}

StatusOr<ColumnPtr> BitmapFunctions::bitmap_subset_in_range(FunctionContext* context,
const starrocks::Columns& columns) {
RETURN_IF_COLUMNS_ONLY_NULL(columns);

ColumnViewer<TYPE_OBJECT> bitmap_viewer(columns[0]);
ColumnViewer<TYPE_BIGINT> range_start_viewer(columns[1]);
ColumnViewer<TYPE_BIGINT> range_end_viewer(columns[2]);

size_t size = columns[0]->size();
ColumnBuilder<TYPE_OBJECT> builder(size);

for (int row = 0; row < size; row++) {
if (bitmap_viewer.is_null(row) || range_start_viewer.is_null(row) || range_end_viewer.is_null(row)) {
builder.append_null();
continue;
}

auto bitmap = bitmap_viewer.value(row);
auto range_start = range_start_viewer.value(row);
auto range_end = range_end_viewer.value(row);

if (range_start < 0) {
range_start = 0;
}

if (bitmap->cardinality() == 0 || range_start >= range_end) {
builder.append_null();
continue;
}

BitmapValue ret_bitmap;
if (bitmap->bitmap_subset_in_range_internal(range_start, range_end, &ret_bitmap) == 0) {
builder.append_null();
continue;
}

builder.append(std::move(ret_bitmap));
}

return builder.build(ColumnHelper::is_all_const(columns));
}

StatusOr<ColumnPtr> BitmapFunctions::bitmap_to_binary(FunctionContext* context, const starrocks::Columns& columns) {
ColumnViewer<TYPE_OBJECT> viewer(columns[0]);

size_t size = columns[0]->size();
ColumnBuilder<TYPE_VARBINARY> builder(size);

raw::RawString buf;
for (int row = 0; row < size; ++row) {
BitmapValue* bitmap = viewer.value(row);
size_t serialize_size = bitmap->getSizeInBytes();
buf.resize(serialize_size);
bitmap->write(buf.data());
builder.append(Slice(buf.data(), serialize_size));
}

ColumnPtr col = builder.build(ColumnHelper::is_all_const(columns));
std::string err_msg;
if (col->capacity_limit_reached(&err_msg)) {
return Status::InternalError(
strings::Substitute("Size of binary column generated by bitmap_to_binary reaches limit: $0", err_msg));
} else {
return col;
}
}

StatusOr<ColumnPtr> BitmapFunctions::bitmap_from_binary(FunctionContext* context, const Columns& columns) {
ColumnViewer<TYPE_VARBINARY> viewer(columns[0]);
size_t size = columns[0]->size();
ColumnBuilder<TYPE_OBJECT> builder(size);

for (int row = 0; row < size; ++row) {
if (viewer.is_null(row)) {
builder.append_null();
continue;
}

auto src_value = viewer.value(row);
if (src_value.size == 0) {
builder.append_null();
continue;
}

BitmapValue bitmap;
bool res = bitmap.valid_and_deserialize(src_value.data, src_value.size);
if (!res) {
builder.append_null();
} else {
builder.append(std::move(bitmap));
}
}
return builder.build(ColumnHelper::is_all_const(columns));
}

>>>>>>> eb4f6912b2 ([Feature] Add function bitmap_from_binary and bitmap_to_binary (#35621))
} // namespace starrocks
31 changes: 31 additions & 0 deletions be/src/exprs/bitmap_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,37 @@ class BitmapFunctions {
* @return TYPE_OBJECT
*/
DEFINE_VECTORIZED_FN(bitmap_to_base64);
<<<<<<< HEAD
=======

/**
* @param:
* @paramType columns: [TYPE_OBJECT, TYPE_BIGINT, TYPE_BIGINT]
* @return TYPE_OBJECT
*/
DEFINE_VECTORIZED_FN(bitmap_subset_in_range);

/**
* @param:
* @paramType columns: [TYPE_OBJECT, TYPE_BIGINT, TYPE_BIGINT]
* @return TYPE_OBJECT
*/
DEFINE_VECTORIZED_FN(bitmap_subset_limit);

/**
* @param:
* @paramType columns: [TYPE_BITMAP]
* @return TYPE_VARCHAR
*/
DEFINE_VECTORIZED_FN(bitmap_to_binary);

/**
* @param
* @paramType columns: [TYPE_VARCHAR]
* @return TYPE_BITMAP
*/
DEFINE_VECTORIZED_FN(bitmap_from_binary);
>>>>>>> eb4f6912b2 ([Feature] Add function bitmap_from_binary and bitmap_to_binary (#35621))
};

} // namespace starrocks
5 changes: 4 additions & 1 deletion be/src/types/bitmap_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,10 @@ class BitmapValue {
private:
void _from_bitmap_to_smaller_type();
void _from_set_to_bitmap();
inline void _copy_on_write() {

// The implementation of this function needs to place .h,
// otherwise it cannot be inlined and affects the performance of BitmapValue::add.
ALWAYS_INLINE void _copy_on_write() {
if (UNLIKELY(_bitmap == nullptr)) {
_bitmap = std::make_shared<detail::Roaring64Map>();
return;
Expand Down
25 changes: 25 additions & 0 deletions gensrc/script/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,14 +417,32 @@
[50221, 'current_date', 'DATE', [], 'TimeFunctions::curdate'],
[50230, 'from_days', 'DATE', ['INT'], 'TimeFunctions::from_days'],
[50231, 'to_days', 'INT', ['DATE'], 'TimeFunctions::to_days'],
<<<<<<< HEAD
=======
[50241, 'date_format', 'VARCHAR', ['DATETIME', 'VARCHAR'], 'TimeFunctions::datetime_format', 'TimeFunctions::format_prepare', 'TimeFunctions::format_close'],
[50242, 'date_format', 'VARCHAR', ['DATE', 'VARCHAR'], 'TimeFunctions::date_format', 'TimeFunctions::format_prepare', 'TimeFunctions::format_close'],

# From string to DATE/DATETIME
# the function will call by FE getStrToDateFunction, and is invisible to user
>>>>>>> eb4f6912b2 ([Feature] Add function bitmap_from_binary and bitmap_to_binary (#35621))
[50240, 'str_to_date', 'DATETIME', ['VARCHAR', 'VARCHAR'], 'TimeFunctions::str_to_date', 'TimeFunctions::str_to_date_prepare', 'TimeFunctions::str_to_date_close'],
[50241, 'date_format', 'VARCHAR', ['DATETIME', 'VARCHAR'], 'TimeFunctions::datetime_format',
'TimeFunctions::format_prepare', 'TimeFunctions::format_close'],
[50242, 'date_format', 'VARCHAR', ['DATE', 'VARCHAR'], 'TimeFunctions::date_format',
'TimeFunctions::format_prepare', 'TimeFunctions::format_close'],
# cast string to date, the function will call by FE getStrToDateFunction, and is invisible to user
[50243, 'str2date', 'DATE', ['VARCHAR', 'VARCHAR'], 'TimeFunctions::str2date', 'TimeFunctions::str_to_date_prepare', 'TimeFunctions::str_to_date_close'],
<<<<<<< HEAD
[50250, 'time_to_sec', 'BIGINT', ['TIME'], 'TimeFunctions::time_to_sec'],
=======

# Joda Time parse & format
[50244, 'str_to_jodatime', 'DATETIME', ['VARCHAR', 'VARCHAR'],
'TimeFunctions::parse_jodatime',
'TimeFunctions::parse_joda_prepare',
'TimeFunctions::parse_joda_close'],

>>>>>>> eb4f6912b2 ([Feature] Add function bitmap_from_binary and bitmap_to_binary (#35621))
[50260, 'jodatime_format', 'VARCHAR', ['DATETIME', 'VARCHAR'], 'TimeFunctions::jodadatetime_format', 'TimeFunctions::jodatime_format_prepare', 'TimeFunctions::jodatime_format_close'],
[50261, 'jodatime_format', 'VARCHAR', ['DATE', 'VARCHAR'], 'TimeFunctions::jodadate_format', 'TimeFunctions::jodatime_format_prepare', 'TimeFunctions::jodatime_format_close'],

Expand Down Expand Up @@ -585,6 +603,13 @@
[90801, 'bitmap_to_base64', 'VARCHAR', ['BITMAP'], 'BitmapFunctions::bitmap_to_base64', False],
[90900, 'array_to_bitmap', 'BITMAP', ['ARRAY_BIGINT'], 'BitmapFunctions::array_to_bitmap', False],
[91000, 'sub_bitmap', 'BITMAP', ['BITMAP', 'BIGINT', 'BIGINT'], 'BitmapFunctions::sub_bitmap', False],
<<<<<<< HEAD
=======
[91001, 'bitmap_subset_limit', 'BITMAP', ['BITMAP', 'BIGINT', 'BIGINT'], 'BitmapFunctions::bitmap_subset_limit', False],
[91002, 'bitmap_subset_in_range', 'BITMAP', ['BITMAP', 'BIGINT', 'BIGINT'], 'BitmapFunctions::bitmap_subset_in_range', False],
[91003, 'bitmap_to_binary', 'VARBINARY', ['BITMAP'], 'BitmapFunctions::bitmap_to_binary', False],
[91004, 'bitmap_from_binary', 'BITMAP', ['VARBINARY'], 'BitmapFunctions::bitmap_from_binary', False],
>>>>>>> eb4f6912b2 ([Feature] Add function bitmap_from_binary and bitmap_to_binary (#35621))

# hash function
[100010, 'murmur_hash3_32', 'INT', ['VARCHAR', '...'], 'HashFunctions::murmur_hash3_32'],
Expand Down
128 changes: 128 additions & 0 deletions test/sql/test_bitmap_functions/R/test_bitmap_functions
Original file line number Diff line number Diff line change
Expand Up @@ -279,4 +279,132 @@ None
1
1,2
1,2
-- !result
-- name: test_bitmap_binary
CREATE TABLE `t1` (
`c1` int(11) NULL COMMENT "",
`c2` bitmap BITMAP_UNION NULL COMMENT ""
) ENGINE=OLAP
AGGREGATE KEY(`c1`)
DISTRIBUTED BY HASH(`c1`) BUCKETS 1
PROPERTIES ("replication_num" = "1");
-- result:
-- !result
insert into t1 values (1, bitmap_empty());
-- result:
-- !result
select c1, hex(bitmap_to_binary(c2)) from t1;
-- result:
1 00
-- !result
select c1, bitmap_count(bitmap_from_binary(bitmap_to_binary(c2))) from t1;
-- result:
1 0
-- !result
truncate table t1;
-- result:
-- !result
insert into t1 values (1, to_bitmap(1));
-- result:
-- !result
select c1, hex(bitmap_to_binary(c2)) from t1;
-- result:
1 0101000000
-- !result
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1;
-- result:
1 1
-- !result
truncate table t1;
-- result:
-- !result
insert into t1 values (1, to_bitmap(17179869184));
-- result:
-- !result
select c1, hex(bitmap_to_binary(c2)) from t1;
-- result:
1 030000000004000000
-- !result
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1;
-- result:
1 17179869184
-- !result
truncate table t1;
-- result:
-- !result
insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(1, 5));
-- result:
-- !result
select c1, hex(bitmap_to_binary(c2)) from t1;
-- result:
1 0A0500000001000000000000000200000000000000030000000000000004000000000000000500000000000000
-- !result
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1;
-- result:
1 1,2,3,4,5
-- !result
truncate table t1;
-- result:
-- !result
insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(1, 40));
-- result:
-- !result
select c1, hex(bitmap_to_binary(c2)) from t1;
-- result:
1 023B3000000100002700010001002700
-- !result
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1;
-- result:
1 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40
-- !result
truncate table t1;
-- result:
-- !result
insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(1, 20));
-- result:
-- !result
insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(17179869184, 17179869284));
-- result:
-- !result
select c1, hex(bitmap_to_binary(c2)) from t1;
-- result:
1 0402000000003A3000000100000000001300100000000100020003000400050006000700080009000A000B000C000D000E000F0010001100120013001400040000003B3000000100006400010000006400
-- !result
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1;
-- result:
1 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,17179869184,17179869185,17179869186,17179869187,17179869188,17179869189,17179869190,17179869191,17179869192,17179869193,17179869194,17179869195,17179869196,17179869197,17179869198,17179869199,17179869200,17179869201,17179869202,17179869203,17179869204,17179869205,17179869206,17179869207,17179869208,17179869209,17179869210,17179869211,17179869212,17179869213,17179869214,17179869215,17179869216,17179869217,17179869218,17179869219,17179869220,17179869221,17179869222,17179869223,17179869224,17179869225,17179869226,17179869227,17179869228,17179869229,17179869230,17179869231,17179869232,17179869233,17179869234,17179869235,17179869236,17179869237,17179869238,17179869239,17179869240,17179869241,17179869242,17179869243,17179869244,17179869245,17179869246,17179869247,17179869248,17179869249,17179869250,17179869251,17179869252,17179869253,17179869254,17179869255,17179869256,17179869257,17179869258,17179869259,17179869260,17179869261,17179869262,17179869263,17179869264,17179869265,17179869266,17179869267,17179869268,17179869269,17179869270,17179869271,17179869272,17179869273,17179869274,17179869275,17179869276,17179869277,17179869278,17179869279,17179869280,17179869281,17179869282,17179869283,17179869284
-- !result
truncate table t1;
-- result:
-- !result
insert into t1 select 1, bitmap_agg(generate_series) from table(generate_series(1, 80));
-- result:
-- !result
insert into t1 select 2, bitmap_agg(generate_series) from table(generate_series(1, 200));
-- result:
-- !result
insert into t1 select 2, bitmap_agg(generate_series) from table(generate_series(900, 910));
-- result:
-- !result
select c1, hex(bitmap_to_binary(c2)) from t1 order by c1;
-- result:
1 023B3000000100004F00010001004F00
2 023B300000010000D20002000100C70084030A00
-- !result
select c1, bitmap_to_string(bitmap_from_binary(bitmap_to_binary(c2))) from t1 order by c1;
-- result:
1 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80
2 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,900,901,902,903,904,905,906,907,908,909,910
-- !result
select bitmap_from_binary(to_binary("1234"));
-- result:
None
-- !result
select bitmap_from_binary(to_binary(""));
-- result:
None
-- !result
select bitmap_from_binary(null);
-- result:
None
-- !result
Loading

0 comments on commit 2cde96c

Please sign in to comment.