Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix expression column aggregate calculation with group_by after remove() #2311

Merged
merged 1 commit into from
Jul 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions cpp/perspective/src/cpp/context_grouped_pkey.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -683,8 +683,8 @@ t_ctx_grouped_pkey::get_column_dtype(t_uindex idx) const {
}

void
t_ctx_grouped_pkey::compute_expressions(
std::shared_ptr<t_data_table> flattened_masked,
t_ctx_grouped_pkey::compute_expressions(std::shared_ptr<t_data_table> master,
std::shared_ptr<t_data_table> flattened,
t_expression_vocab& expression_vocab, t_regex_mapping& regex_mapping) {
// Clear the transitional expression tables on the context so they are
// ready for the next update.
Expand All @@ -693,14 +693,22 @@ t_ctx_grouped_pkey::compute_expressions(
std::shared_ptr<t_data_table> master_expression_table
= m_expression_tables->m_master;

t_uindex flattened_num_rows = flattened->size();
m_expression_tables->reserve_transitional_table_size(flattened_num_rows);
m_expression_tables->set_transitional_table_size(flattened_num_rows);

// Set the master table to the right size.
master_expression_table->reserve(flattened_masked->size());
master_expression_table->set_size(flattened_masked->size());
t_uindex num_rows = master->size();
master_expression_table->reserve(num_rows);
master_expression_table->set_size(num_rows);

const auto& expressions = m_config.get_expressions();
for (const auto& expr : expressions) {
// Compute the expressions on the master table.
expr->compute(flattened_masked, master_expression_table,
expr->compute(
master, master_expression_table, expression_vocab, regex_mapping);

expr->compute(flattened, m_expression_tables->m_flattened,
expression_vocab, regex_mapping);
}
}
Expand Down
14 changes: 11 additions & 3 deletions cpp/perspective/src/cpp/context_one.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -611,7 +611,8 @@ t_ctx1::get_trav_depth(t_index idx) const {
}

void
t_ctx1::compute_expressions(std::shared_ptr<t_data_table> flattened_masked,
t_ctx1::compute_expressions(std::shared_ptr<t_data_table> master,
std::shared_ptr<t_data_table> flattened,
t_expression_vocab& expression_vocab, t_regex_mapping& regex_mapping) {
// Clear the transitional expression tables on the context so they are
// ready for the next update.
Expand All @@ -620,15 +621,22 @@ t_ctx1::compute_expressions(std::shared_ptr<t_data_table> flattened_masked,
std::shared_ptr<t_data_table> master_expression_table
= m_expression_tables->m_master;

t_uindex flattened_num_rows = flattened->size();
m_expression_tables->reserve_transitional_table_size(flattened_num_rows);
m_expression_tables->set_transitional_table_size(flattened_num_rows);

// Set the master table to the right size.
t_uindex num_rows = flattened_masked->size();
t_uindex num_rows = master->size();
master_expression_table->reserve(num_rows);
master_expression_table->set_size(num_rows);

const auto& expressions = m_config.get_expressions();
for (const auto& expr : expressions) {
// Compute the expressions on the master table.
expr->compute(flattened_masked, master_expression_table,
expr->compute(
master, master_expression_table, expression_vocab, regex_mapping);

expr->compute(flattened, m_expression_tables->m_flattened,
expression_vocab, regex_mapping);
}
}
Expand Down
14 changes: 11 additions & 3 deletions cpp/perspective/src/cpp/context_two.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1053,7 +1053,8 @@ t_ctx2::get_column_dtype(t_uindex idx) const {
}

void
t_ctx2::compute_expressions(std::shared_ptr<t_data_table> flattened_masked,
t_ctx2::compute_expressions(std::shared_ptr<t_data_table> master,
std::shared_ptr<t_data_table> flattened,
t_expression_vocab& expression_vocab, t_regex_mapping& regex_mapping) {
// Clear the transitional expression tables on the context so they are
// ready for the next update.
Expand All @@ -1062,15 +1063,22 @@ t_ctx2::compute_expressions(std::shared_ptr<t_data_table> flattened_masked,
std::shared_ptr<t_data_table> master_expression_table
= m_expression_tables->m_master;

t_uindex flattened_num_rows = flattened->size();
m_expression_tables->reserve_transitional_table_size(flattened_num_rows);
m_expression_tables->set_transitional_table_size(flattened_num_rows);

// Set the master table to the right size.
t_uindex num_rows = flattened_masked->size();
t_uindex num_rows = master->size();
master_expression_table->reserve(num_rows);
master_expression_table->set_size(num_rows);

const auto& expressions = m_config.get_expressions();
for (const auto& expr : expressions) {
// Compute the expressions on the master table.
expr->compute(flattened_masked, master_expression_table,
expr->compute(
master, master_expression_table, expression_vocab, regex_mapping);

expr->compute(flattened, m_expression_tables->m_flattened,
expression_vocab, regex_mapping);
}
}
Expand Down
14 changes: 11 additions & 3 deletions cpp/perspective/src/cpp/context_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -619,7 +619,8 @@ t_ctx0::get_step_delta(t_index bidx, t_index eidx) {
}

void
t_ctx0::compute_expressions(std::shared_ptr<t_data_table> flattened_masked,
t_ctx0::compute_expressions(std::shared_ptr<t_data_table> master,
std::shared_ptr<t_data_table> flattened,
t_expression_vocab& expression_vocab, t_regex_mapping& regex_mapping) {
// Clear the transitional expression tables on the context so they are
// ready for the next update.
Expand All @@ -628,15 +629,22 @@ t_ctx0::compute_expressions(std::shared_ptr<t_data_table> flattened_masked,
std::shared_ptr<t_data_table> master_expression_table
= m_expression_tables->m_master;

t_uindex flattened_num_rows = flattened->size();
m_expression_tables->reserve_transitional_table_size(flattened_num_rows);
m_expression_tables->set_transitional_table_size(flattened_num_rows);

// Set the master table to the right size.
t_uindex num_rows = flattened_masked->size();
t_uindex num_rows = master->size();
master_expression_table->reserve(num_rows);
master_expression_table->set_size(num_rows);

const auto& expressions = m_config.get_expressions();
for (const auto& expr : expressions) {
// Compute the expressions on the master table.
expr->compute(flattened_masked, master_expression_table,
expr->compute(
master, master_expression_table, expression_vocab, regex_mapping);

expr->compute(flattened, m_expression_tables->m_flattened,
expression_vocab, regex_mapping);
}
}
Expand Down
5 changes: 5 additions & 0 deletions cpp/perspective/src/cpp/expression_tables.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ t_expression_tables::t_expression_tables(
m_transitions->init();
}

t_data_table*
t_expression_tables::get_table() const {
return m_master.get();
}

void
t_expression_tables::calculate_transitions(
std::shared_ptr<t_data_table> existed) {
Expand Down
31 changes: 17 additions & 14 deletions cpp/perspective/src/cpp/gnode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -871,30 +871,29 @@ t_gnode::_register_context(
ctx->reset();

if (should_update) {
ctx->compute_expressions(
pkeyed_table, expression_vocab, expression_regex_mapping);
ctx->compute_expressions(m_gstate->get_table(), pkeyed_table,
expression_vocab, expression_regex_mapping);
update_context_from_state<t_ctx2>(ctx, name, pkeyed_table);
}
} break;
case ONE_SIDED_CONTEXT: {
set_ctx_state<t_ctx1>(ptr_);
t_ctx1* ctx = static_cast<t_ctx1*>(ptr_);
ctx->reset();

if (should_update) {
ctx->compute_expressions(
pkeyed_table, expression_vocab, expression_regex_mapping);
ctx->compute_expressions(m_gstate->get_table(), pkeyed_table,
expression_vocab, expression_regex_mapping);

update_context_from_state<t_ctx1>(ctx, name, pkeyed_table);
}
} break;
case ZERO_SIDED_CONTEXT: {
set_ctx_state<t_ctx0>(ptr_);
t_ctx0* ctx = static_cast<t_ctx0*>(ptr_);
ctx->reset();

if (should_update) {
ctx->compute_expressions(
pkeyed_table, expression_vocab, expression_regex_mapping);
ctx->compute_expressions(m_gstate->get_table(), pkeyed_table,
expression_vocab, expression_regex_mapping);
update_context_from_state<t_ctx0>(ctx, name, pkeyed_table);
}
} break;
Expand All @@ -914,8 +913,8 @@ t_gnode::_register_context(
ctx->reset();

if (should_update) {
ctx->compute_expressions(
pkeyed_table, expression_vocab, expression_regex_mapping);
ctx->compute_expressions(m_gstate->get_table(), pkeyed_table,
expression_vocab, expression_regex_mapping);
update_context_from_state<t_ctx_grouped_pkey>(
ctx, name, pkeyed_table);
}
Expand Down Expand Up @@ -999,23 +998,27 @@ t_gnode::_compute_expressions(std::shared_ptr<t_data_table> flattened_masked) {
switch (ctxh.get_type()) {
case TWO_SIDED_CONTEXT: {
t_ctx2* ctx = static_cast<t_ctx2*>(ctxh.m_ctx);
ctx->compute_expressions(flattened_masked, expression_vocab,
ctx->compute_expressions(m_gstate->get_table(),
flattened_masked, expression_vocab,
expression_regex_mapping);
} break;
case ONE_SIDED_CONTEXT: {
t_ctx1* ctx = static_cast<t_ctx1*>(ctxh.m_ctx);
ctx->compute_expressions(flattened_masked, expression_vocab,
ctx->compute_expressions(m_gstate->get_table(),
flattened_masked, expression_vocab,
expression_regex_mapping);
} break;
case ZERO_SIDED_CONTEXT: {
t_ctx0* ctx = static_cast<t_ctx0*>(ctxh.m_ctx);
ctx->compute_expressions(flattened_masked, expression_vocab,
ctx->compute_expressions(m_gstate->get_table(),
flattened_masked, expression_vocab,
expression_regex_mapping);
} break;
case GROUPED_PKEY_CONTEXT: {
t_ctx_grouped_pkey* ctx
= static_cast<t_ctx_grouped_pkey*>(ctxh.m_ctx);
ctx->compute_expressions(flattened_masked, expression_vocab,
ctx->compute_expressions(m_gstate->get_table(),
flattened_masked, expression_vocab,
expression_regex_mapping);
} break;
case UNIT_CONTEXT:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ std::shared_ptr<t_expression_tables> get_expression_tables() const;

// Given shared pointers to data tables from the gnode, use them to
// compute the results of expression columns.
void compute_expressions(std::shared_ptr<t_data_table> flattened_masked,
void compute_expressions(std::shared_ptr<t_data_table> master,
std::shared_ptr<t_data_table> flattened_masked,
t_expression_vocab& expression_vocab, t_regex_mapping& regex_mapping);

void compute_expressions(std::shared_ptr<t_data_table> master,
Expand Down
2 changes: 2 additions & 0 deletions cpp/perspective/src/include/perspective/expression_tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ struct t_expression_tables {

void reset();

t_data_table* get_table() const;

// master table is calculated from t_gstate's master table
std::shared_ptr<t_data_table> m_master;

Expand Down
2 changes: 1 addition & 1 deletion cpp/perspective/src/include/perspective/gnode.h
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,7 @@ t_gnode::update_context_from_state(CTX_T* ctx, const std::string& name,
std::shared_ptr<t_expression_tables> ctx_expression_tables
= ctx->get_expression_tables();
std::shared_ptr<t_data_table> joined_flattened
= flattened->join(ctx_expression_tables->m_master);
= flattened->join(ctx_expression_tables->m_flattened);
ctx->notify(*joined_flattened);
} else {
// Just use the table from the gnode
Expand Down
66 changes: 64 additions & 2 deletions python/perspective/perspective/tests/table/test_remove.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,17 @@


class TestRemove(object):

def test_remove_all(self):
tbl = Table([{"a": "abc", "b": 123}], index="a")
tbl.remove(["abc"])
assert tbl.view().to_records() == []
# assert tbl.size() == 0

def test_remove_nonsequential(self):
tbl = Table([{"a": "abc", "b": 123}, {"a": "def", "b": 456}, {"a": "efg", "b": 789}], index="a")
tbl = Table(
[{"a": "abc", "b": 123}, {"a": "def", "b": 456}, {"a": "efg", "b": 789}],
index="a",
)
tbl.remove(["abc", "efg"])
assert tbl.view().to_records() == [{"a": "def", "b": 456}]
# assert tbl.size() == 1
Expand All @@ -35,3 +37,63 @@ def test_remove_multiple_single(self):
tbl.remove([i])
assert tbl.view().to_records() == [{"a": 0, "b": "0"}]
# assert tbl.size() == 0

def test_remove_expressions(self):
schema = {"key": str, "delta$": float, "business_line": str}
data = [
{
"key": "A",
"delta$": 46412.3804275,
},
{
"key": "B",
"delta$": 2317615.875,
},
]

table = Table(schema, index="key")
table.update(data)
table.remove(["A"])
view = table.view(
group_by=["business_line"],
columns=["delta$", "alias"],
expressions=[
'// alias\n"delta$"',
],
)

records = view.to_records()
assert records == [
{"__ROW_PATH__": [], "delta$": 2317615.875, "alias": 2317615.875},
{"__ROW_PATH__": [None], "delta$": 2317615.875, "alias": 2317615.875},
]

def test_remove_expressions_after_view(self):
schema = {"key": str, "delta$": float, "business_line": str}
data = [
{
"key": "A",
"delta$": 46412.3804275,
},
{
"key": "B",
"delta$": 2317615.875,
},
]

table = Table(schema, index="key")
table.update(data)
view = table.view(
group_by=["business_line"],
columns=["delta$", "alias"],
expressions=[
'// alias\n"delta$"',
],
)

table.remove(["A"])
records = view.to_records()
assert records == [
{"__ROW_PATH__": [], "delta$": 2317615.875, "alias": 2317615.875},
{"__ROW_PATH__": [None], "delta$": 2317615.875, "alias": 2317615.875},
]