From ff59bb98cd0e95e03e85f8cd02a72ec6011f22e4 Mon Sep 17 00:00:00 2001 From: Andrew Stein Date: Wed, 2 Aug 2023 00:35:55 -0400 Subject: [PATCH 1/2] Fix double expression evaluation on `view()` --- .../src/cpp/context_grouped_pkey.cpp | 8 --- cpp/perspective/src/cpp/context_one.cpp | 8 --- cpp/perspective/src/cpp/context_two.cpp | 8 --- cpp/perspective/src/cpp/context_zero.cpp | 8 --- cpp/perspective/src/cpp/expression_tables.cpp | 13 +++++ cpp/perspective/src/cpp/gnode.cpp | 55 +++++++++++++++---- cpp/perspective/src/cpp/gnode_state.cpp | 21 ++++--- .../perspective/context_common_decls.h | 1 - .../include/perspective/expression_tables.h | 2 + .../src/include/perspective/gnode_state.h | 2 + tools/perspective-bench/src/js/worker.js | 25 +++++++++ 11 files changed, 97 insertions(+), 54 deletions(-) diff --git a/cpp/perspective/src/cpp/context_grouped_pkey.cpp b/cpp/perspective/src/cpp/context_grouped_pkey.cpp index 46400da521..486f9dbed0 100644 --- a/cpp/perspective/src/cpp/context_grouped_pkey.cpp +++ b/cpp/perspective/src/cpp/context_grouped_pkey.cpp @@ -684,7 +684,6 @@ t_ctx_grouped_pkey::get_column_dtype(t_uindex idx) const { void t_ctx_grouped_pkey::compute_expressions(std::shared_ptr master, - std::shared_ptr flattened, t_expression_vocab& expression_vocab, t_regex_mapping& regex_mapping) { // Clear the transitional expression tables on the context so they are // ready for the next update. @@ -693,10 +692,6 @@ t_ctx_grouped_pkey::compute_expressions(std::shared_ptr master, std::shared_ptr master_expression_table = m_expression_tables->m_master; - t_uindex flattened_num_rows = flattened->size(); - m_expression_tables->reserve_transitional_table_size(flattened_num_rows); - m_expression_tables->set_transitional_table_size(flattened_num_rows); - // Set the master table to the right size. t_uindex num_rows = master->size(); master_expression_table->reserve(num_rows); @@ -707,9 +702,6 @@ t_ctx_grouped_pkey::compute_expressions(std::shared_ptr master, // Compute the expressions on the master table. expr->compute( master, master_expression_table, expression_vocab, regex_mapping); - - expr->compute(flattened, m_expression_tables->m_flattened, - expression_vocab, regex_mapping); } } diff --git a/cpp/perspective/src/cpp/context_one.cpp b/cpp/perspective/src/cpp/context_one.cpp index 0137c45d3c..7a01a7d6d0 100644 --- a/cpp/perspective/src/cpp/context_one.cpp +++ b/cpp/perspective/src/cpp/context_one.cpp @@ -612,7 +612,6 @@ t_ctx1::get_trav_depth(t_index idx) const { void t_ctx1::compute_expressions(std::shared_ptr master, - std::shared_ptr flattened, t_expression_vocab& expression_vocab, t_regex_mapping& regex_mapping) { // Clear the transitional expression tables on the context so they are // ready for the next update. @@ -621,10 +620,6 @@ t_ctx1::compute_expressions(std::shared_ptr master, std::shared_ptr master_expression_table = m_expression_tables->m_master; - t_uindex flattened_num_rows = flattened->size(); - m_expression_tables->reserve_transitional_table_size(flattened_num_rows); - m_expression_tables->set_transitional_table_size(flattened_num_rows); - // Set the master table to the right size. t_uindex num_rows = master->size(); master_expression_table->reserve(num_rows); @@ -635,9 +630,6 @@ t_ctx1::compute_expressions(std::shared_ptr master, // Compute the expressions on the master table. expr->compute( master, master_expression_table, expression_vocab, regex_mapping); - - expr->compute(flattened, m_expression_tables->m_flattened, - expression_vocab, regex_mapping); } } diff --git a/cpp/perspective/src/cpp/context_two.cpp b/cpp/perspective/src/cpp/context_two.cpp index 66d52f3b4b..877d404eaf 100644 --- a/cpp/perspective/src/cpp/context_two.cpp +++ b/cpp/perspective/src/cpp/context_two.cpp @@ -1054,7 +1054,6 @@ t_ctx2::get_column_dtype(t_uindex idx) const { void t_ctx2::compute_expressions(std::shared_ptr master, - std::shared_ptr flattened, t_expression_vocab& expression_vocab, t_regex_mapping& regex_mapping) { // Clear the transitional expression tables on the context so they are // ready for the next update. @@ -1063,10 +1062,6 @@ t_ctx2::compute_expressions(std::shared_ptr master, std::shared_ptr master_expression_table = m_expression_tables->m_master; - t_uindex flattened_num_rows = flattened->size(); - m_expression_tables->reserve_transitional_table_size(flattened_num_rows); - m_expression_tables->set_transitional_table_size(flattened_num_rows); - // Set the master table to the right size. t_uindex num_rows = master->size(); master_expression_table->reserve(num_rows); @@ -1077,9 +1072,6 @@ t_ctx2::compute_expressions(std::shared_ptr master, // Compute the expressions on the master table. expr->compute( master, master_expression_table, expression_vocab, regex_mapping); - - expr->compute(flattened, m_expression_tables->m_flattened, - expression_vocab, regex_mapping); } } diff --git a/cpp/perspective/src/cpp/context_zero.cpp b/cpp/perspective/src/cpp/context_zero.cpp index ee5257094b..a0dc9acd8f 100644 --- a/cpp/perspective/src/cpp/context_zero.cpp +++ b/cpp/perspective/src/cpp/context_zero.cpp @@ -620,7 +620,6 @@ t_ctx0::get_step_delta(t_index bidx, t_index eidx) { void t_ctx0::compute_expressions(std::shared_ptr master, - std::shared_ptr flattened, t_expression_vocab& expression_vocab, t_regex_mapping& regex_mapping) { // Clear the transitional expression tables on the context so they are // ready for the next update. @@ -629,10 +628,6 @@ t_ctx0::compute_expressions(std::shared_ptr master, std::shared_ptr master_expression_table = m_expression_tables->m_master; - t_uindex flattened_num_rows = flattened->size(); - m_expression_tables->reserve_transitional_table_size(flattened_num_rows); - m_expression_tables->set_transitional_table_size(flattened_num_rows); - // Set the master table to the right size. t_uindex num_rows = master->size(); master_expression_table->reserve(num_rows); @@ -643,9 +638,6 @@ t_ctx0::compute_expressions(std::shared_ptr master, // Compute the expressions on the master table. expr->compute( master, master_expression_table, expression_vocab, regex_mapping); - - expr->compute(flattened, m_expression_tables->m_flattened, - expression_vocab, regex_mapping); } } diff --git a/cpp/perspective/src/cpp/expression_tables.cpp b/cpp/perspective/src/cpp/expression_tables.cpp index 531336b353..ec489e37a1 100644 --- a/cpp/perspective/src/cpp/expression_tables.cpp +++ b/cpp/perspective/src/cpp/expression_tables.cpp @@ -51,6 +51,19 @@ t_expression_tables::get_table() const { return m_master.get(); } +void +t_expression_tables::set_flattened(std::shared_ptr flattened) { + t_uindex flattened_num_rows = flattened->size(); + reserve_transitional_table_size(flattened_num_rows); + set_transitional_table_size(flattened_num_rows); + const t_schema& schema = m_flattened->get_schema(); + const std::vector& column_names = schema.m_columns; + for (const auto& colname : column_names) { + m_flattened->set_column( + colname, flattened->get_column(colname)->clone()); + } +} + void t_expression_tables::calculate_transitions( std::shared_ptr existed) { diff --git a/cpp/perspective/src/cpp/gnode.cpp b/cpp/perspective/src/cpp/gnode.cpp index 2650a55282..131bb6b1c1 100644 --- a/cpp/perspective/src/cpp/gnode.cpp +++ b/cpp/perspective/src/cpp/gnode.cpp @@ -871,8 +871,13 @@ t_gnode::_register_context( ctx->reset(); if (should_update) { - ctx->compute_expressions(m_gstate->get_table(), pkeyed_table, + ctx->compute_expressions(m_gstate->get_table(), expression_vocab, expression_regex_mapping); + ctx->get_expression_tables()->set_flattened( + m_gstate->get_pkeyed_table( + ctx->get_expression_tables()->m_master->get_schema(), + ctx->get_expression_tables()->m_master)); + update_context_from_state(ctx, name, pkeyed_table); } } break; @@ -881,8 +886,12 @@ t_gnode::_register_context( t_ctx1* ctx = static_cast(ptr_); ctx->reset(); if (should_update) { - ctx->compute_expressions(m_gstate->get_table(), pkeyed_table, + ctx->compute_expressions(m_gstate->get_table(), expression_vocab, expression_regex_mapping); + ctx->get_expression_tables()->set_flattened( + m_gstate->get_pkeyed_table( + ctx->get_expression_tables()->m_master->get_schema(), + ctx->get_expression_tables()->m_master)); update_context_from_state(ctx, name, pkeyed_table); } @@ -892,8 +901,13 @@ t_gnode::_register_context( t_ctx0* ctx = static_cast(ptr_); ctx->reset(); if (should_update) { - ctx->compute_expressions(m_gstate->get_table(), pkeyed_table, + ctx->compute_expressions(m_gstate->get_table(), expression_vocab, expression_regex_mapping); + ctx->get_expression_tables()->set_flattened( + m_gstate->get_pkeyed_table( + ctx->get_expression_tables()->m_master->get_schema(), + ctx->get_expression_tables()->m_master)); + update_context_from_state(ctx, name, pkeyed_table); } } break; @@ -913,8 +927,13 @@ t_gnode::_register_context( ctx->reset(); if (should_update) { - ctx->compute_expressions(m_gstate->get_table(), pkeyed_table, + ctx->compute_expressions(m_gstate->get_table(), expression_vocab, expression_regex_mapping); + ctx->get_expression_tables()->set_flattened( + m_gstate->get_pkeyed_table( + ctx->get_expression_tables()->m_master->get_schema(), + ctx->get_expression_tables()->m_master)); + update_context_from_state( ctx, name, pkeyed_table); } @@ -999,27 +1018,39 @@ t_gnode::_compute_expressions(std::shared_ptr flattened_masked) { case TWO_SIDED_CONTEXT: { t_ctx2* ctx = static_cast(ctxh.m_ctx); ctx->compute_expressions(m_gstate->get_table(), - flattened_masked, expression_vocab, - expression_regex_mapping); + expression_vocab, expression_regex_mapping); + ctx->get_expression_tables()->set_flattened( + m_gstate->get_pkeyed_table( + ctx->get_expression_tables()->m_master->get_schema(), + ctx->get_expression_tables()->m_master)); } break; case ONE_SIDED_CONTEXT: { t_ctx1* ctx = static_cast(ctxh.m_ctx); ctx->compute_expressions(m_gstate->get_table(), - flattened_masked, expression_vocab, - expression_regex_mapping); + expression_vocab, expression_regex_mapping); + ctx->get_expression_tables()->set_flattened( + m_gstate->get_pkeyed_table( + ctx->get_expression_tables()->m_master->get_schema(), + ctx->get_expression_tables()->m_master)); } break; case ZERO_SIDED_CONTEXT: { t_ctx0* ctx = static_cast(ctxh.m_ctx); ctx->compute_expressions(m_gstate->get_table(), - flattened_masked, expression_vocab, - expression_regex_mapping); + expression_vocab, expression_regex_mapping); + ctx->get_expression_tables()->set_flattened( + m_gstate->get_pkeyed_table( + ctx->get_expression_tables()->m_master->get_schema(), + ctx->get_expression_tables()->m_master)); } break; case GROUPED_PKEY_CONTEXT: { t_ctx_grouped_pkey* ctx = static_cast(ctxh.m_ctx); ctx->compute_expressions(m_gstate->get_table(), - flattened_masked, expression_vocab, - expression_regex_mapping); + expression_vocab, expression_regex_mapping); + ctx->get_expression_tables()->set_flattened( + m_gstate->get_pkeyed_table( + ctx->get_expression_tables()->m_master->get_schema(), + ctx->get_expression_tables()->m_master)); } break; case UNIT_CONTEXT: break; diff --git a/cpp/perspective/src/cpp/gnode_state.cpp b/cpp/perspective/src/cpp/gnode_state.cpp index 55920d46e2..4c82616c9c 100644 --- a/cpp/perspective/src/cpp/gnode_state.cpp +++ b/cpp/perspective/src/cpp/gnode_state.cpp @@ -547,10 +547,16 @@ t_gstate::get_pkey_dtype() const { std::shared_ptr t_gstate::get_pkeyed_table() const { + return get_pkeyed_table(m_input_schema, m_table); +} + +std::shared_ptr +t_gstate::get_pkeyed_table( + const t_schema& schema, const std::shared_ptr table) const { // If there are no removes, just return the gstate table. Removes would // cause m_mapping to be smaller than m_table. - if (m_mapping.size() == m_table->size()) - return m_table; + if (m_mapping.size() == table->size()) + return table; // Otherwise mask out the removed rows and return the table. auto mask = get_cpp_mask(); @@ -558,22 +564,19 @@ t_gstate::get_pkeyed_table() const { // count = total number of rows - number of removed rows t_uindex table_size = mask.count(); - const auto& schema_columns = m_input_schema.m_columns; + const auto& schema_columns = schema.m_columns; t_uindex num_columns = schema_columns.size(); - // Clone from the gstate master table - const std::shared_ptr& master_table = m_table; - std::shared_ptr rval - = std::make_shared(m_input_schema, table_size); + = std::make_shared(schema, table_size); rval->init(); rval->set_size(table_size); parallel_for(int(num_columns), - [&schema_columns, rval, master_table, &mask](int colidx) { + [&schema_columns, rval, table, &mask](int colidx) { const std::string& colname = schema_columns[colidx]; rval->set_column( - colname, master_table->get_const_column(colname)->clone(mask)); + colname, table->get_const_column(colname)->clone(mask)); } ); diff --git a/cpp/perspective/src/include/perspective/context_common_decls.h b/cpp/perspective/src/include/perspective/context_common_decls.h index 83d9a33d2d..c8224e5970 100644 --- a/cpp/perspective/src/include/perspective/context_common_decls.h +++ b/cpp/perspective/src/include/perspective/context_common_decls.h @@ -97,7 +97,6 @@ std::shared_ptr get_expression_tables() const; // Given shared pointers to data tables from the gnode, use them to // compute the results of expression columns. void compute_expressions(std::shared_ptr master, - std::shared_ptr flattened_masked, t_expression_vocab& expression_vocab, t_regex_mapping& regex_mapping); void compute_expressions(std::shared_ptr master, diff --git a/cpp/perspective/src/include/perspective/expression_tables.h b/cpp/perspective/src/include/perspective/expression_tables.h index 2d09bb8317..8ab5060615 100644 --- a/cpp/perspective/src/include/perspective/expression_tables.h +++ b/cpp/perspective/src/include/perspective/expression_tables.h @@ -50,6 +50,8 @@ struct t_expression_tables { // Calculate the `t_transitions` value for each row. void calculate_transitions(std::shared_ptr existed); + void set_flattened(std::shared_ptr flattened); + void reset(); t_data_table* get_table() const; diff --git a/cpp/perspective/src/include/perspective/gnode_state.h b/cpp/perspective/src/include/perspective/gnode_state.h index c450340774..2435e865c3 100644 --- a/cpp/perspective/src/include/perspective/gnode_state.h +++ b/cpp/perspective/src/include/perspective/gnode_state.h @@ -199,6 +199,8 @@ class PERSPECTIVE_EXPORT t_gstate { // Getters std::shared_ptr get_table() const; std::shared_ptr get_pkeyed_table() const; + std::shared_ptr get_pkeyed_table(const t_schema& schema, + const std::shared_ptr table) const; const t_schema& get_input_schema() const; const t_schema& get_output_schema() const; diff --git a/tools/perspective-bench/src/js/worker.js b/tools/perspective-bench/src/js/worker.js index b7e37562f4..51cfd47f49 100644 --- a/tools/perspective-bench/src/js/worker.js +++ b/tools/perspective-bench/src/js/worker.js @@ -181,6 +181,31 @@ async function view_suite() { }, }); + await benchmark({ + name: `.view({expressions: ["Sales" + "Profit"]})`, + before_all, + after_all, + after, + async test(perspective, { table, schema }) { + const [M, m, _] = perspective.version; + // const columns = Object.keys(schema); + columns = [`"Sales" + "Profit"`]; + if ((M === 1 && m >= 2) || M === 2) { + return await table.view({ + columns, + group_by: ["Product Name"], + expressions: [`"Sales" + "Profit"`], + }); + } else { + return await table.view({ + columns, + row_pivots: ["Product Name"], + expressions: [`"Sales" + "Profit"`], + }); + } + }, + }); + await benchmark({ name: `.view({group_by, aggregates: "median"})`, before_all, From eca996ad7ca57c848a29c706d301ca16d3d3e67b Mon Sep 17 00:00:00 2001 From: Andrew Stein Date: Sun, 23 Jul 2023 11:48:10 -0400 Subject: [PATCH 2/2] Fix GIL release for `to_columns_string()` --- cpp/perspective/src/cpp/view.cpp | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/cpp/perspective/src/cpp/view.cpp b/cpp/perspective/src/cpp/view.cpp index 74b83a524f..9e9c718f40 100644 --- a/cpp/perspective/src/cpp/view.cpp +++ b/cpp/perspective/src/cpp/view.cpp @@ -1580,10 +1580,10 @@ View::to_columns(t_uindex start_row, t_uindex end_row, bool get_pkeys, bool get_ids, bool _leaves_only, t_uindex num_sides, bool _has_row_path, std::string nidx, t_uindex columns_length, t_uindex group_by_length) const { - + PSP_GIL_UNLOCK(); + PSP_READ_LOCK(get_lock()); auto slice = get_data(start_row, end_row, start_col, end_col); - auto col_names = slice->get_column_names(); - auto schema = m_ctx->get_schema(); + auto& col_names = slice->get_column_names(); rapidjson::StringBuffer s; rapidjson::Writer writer(s); @@ -1628,9 +1628,12 @@ View::to_columns(t_uindex start_row, t_uindex end_row, bool get_pkeys, bool get_ids, bool _leaves_only, t_uindex num_sides, bool _has_row_path, std::string nidx, t_uindex columns_length, t_uindex group_by_length) const { + PSP_GIL_UNLOCK(); + PSP_READ_LOCK(get_lock()); auto slice = get_data(start_row, end_row, start_col, end_col); - auto col_names = slice->get_column_names(); - auto schema = m_ctx->get_schema(); + const std::vector>& col_names + = slice->get_column_names(); + rapidjson::StringBuffer s; rapidjson::Writer writer(s); writer.StartObject(); @@ -1671,8 +1674,11 @@ View::to_columns(t_uindex start_row, t_uindex end_row, bool get_pkeys, bool get_ids, bool leaves_only, t_uindex num_sides, bool has_row_path, std::string nidx, t_uindex columns_length, t_uindex group_by_length) const { + PSP_GIL_UNLOCK(); + PSP_READ_LOCK(get_lock()); + auto slice = get_data(start_row, end_row, start_col, end_col); - auto col_names = slice->get_column_names(); + const auto& col_names = slice->get_column_names(); rapidjson::StringBuffer s; rapidjson::Writer writer(s); writer.StartObject(); @@ -1721,8 +1727,10 @@ View::to_columns(t_uindex start_row, t_uindex end_row, bool get_pkeys, bool get_ids, bool leaves_only, t_uindex num_sides, bool has_row_path, std::string nidx, t_uindex columns_length, t_uindex group_by_length) const { - auto slice = get_data(start_row, end_row, start_col, end_col); - auto col_names = slice->get_column_names(); + PSP_GIL_UNLOCK(); + PSP_READ_LOCK(get_lock()); + const auto slice = get_data(start_row, end_row, start_col, end_col); + const auto& col_names = slice->get_column_names(); rapidjson::StringBuffer s; rapidjson::Writer writer(s); writer.StartObject();