Skip to content

Commit

Permalink
Fix incorrect result in lazy vector (#37)
Browse files Browse the repository at this point in the history
  • Loading branch information
rui-mo authored Aug 11, 2022
1 parent 28b1d94 commit e63550c
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 17 deletions.
13 changes: 0 additions & 13 deletions velox/exec/FilterProject.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,19 +159,6 @@ RowVectorPtr FilterProject::getOutput() {
}

void FilterProject::project(const SelectivityVector& rows, EvalCtx* evalCtx) {
// Make sure LazyVectors are loaded for all the "rows".
//
// Consider projection with 2 expressions: f(a) AND g(b), h(b)
// If b is a LazyVector and f(a) AND g(b) expression is evaluated first, it
// will load b only for rows where f(a) is true. However, h(b) projection
// needs all rows for "b".
//
// This works, but may load more rows than necessary. E.g. if we only have
// f(a) AND g(b) expression and b is not used anywhere else, it is sufficient
// to load b for a subset of rows where f(a) is true.
*evalCtx->mutableIsFinalSelection() = false;
*evalCtx->mutableFinalSelection() = &rows;

exprs_->eval(
hasFilter_ ? 1 : 0, numExprs_, !hasFilter_, rows, evalCtx, &results_);
}
Expand Down
27 changes: 23 additions & 4 deletions velox/expression/Expr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,14 @@ bool isMember(
}

void mergeFields(
std::vector<FieldReference*>& fields,
std::vector<FieldReference*>& distinctFields,
std::unordered_set<FieldReference*>& multiplyReferencedFields_,
const std::vector<FieldReference*>& moreFields) {
for (auto* newField : moreFields) {
if (!isMember(fields, *newField)) {
fields.emplace_back(newField);
if (isMember(distinctFields, *newField)) {
multiplyReferencedFields_.insert(newField);
} else {
distinctFields.emplace_back(newField);
}
}
}
Expand Down Expand Up @@ -145,7 +148,8 @@ void Expr::computeMetadata() {
input->computeMetadata();
deterministic_ &= input->deterministic_;
propagatesNulls_ &= input->propagatesNulls_;
mergeFields(distinctFields_, input->distinctFields_);
mergeFields(
distinctFields_, multiplyReferencedFields_, input->distinctFields_);
}
if (isSpecialForm()) {
propagatesNulls_ = propagatesNulls();
Expand Down Expand Up @@ -287,6 +291,12 @@ void Expr::eval(
for (const auto& field : distinctFields_) {
context.ensureFieldLoaded(field->index(context), rows);
}
} else if (!propagatesNulls_) {
// Load multiply-referenced fields at common parent expr with "rows".
// Delay loading fields that are not in multiplyReferencedFields_.
for (const auto& field : multiplyReferencedFields_) {
context.ensureFieldLoaded(field->index(context), rows);
}
}

if (inputs_.empty()) {
Expand Down Expand Up @@ -1232,6 +1242,11 @@ ExprSet::ExprSet(
: execCtx_(execCtx) {
exprs_ = compileExpressions(
std::move(sources), execCtx, this, enableConstantFolding);
std::vector<FieldReference*> allDistinctFields;
for (auto& expr : exprs_) {
mergeFields(
allDistinctFields, multiplyReferencedFields_, expr->distinctFields());
}
}

namespace {
Expand Down Expand Up @@ -1306,6 +1321,9 @@ void ExprSet::eval(
if (initialize) {
clearSharedSubexprs();
}
for (const auto& field : multiplyReferencedFields_) {
context->ensureFieldLoaded(field->index(*context), rows);
}
for (int32_t i = begin; i < end; ++i) {
exprs_[i]->eval(rows, *context, (*result)[i]);
}
Expand All @@ -1322,6 +1340,7 @@ void ExprSet::clear() {
for (auto* memo : memoizingExprs_) {
memo->clearMemo();
}
multiplyReferencedFields_.clear();
}

void ExprSetSimplified::eval(
Expand Down
7 changes: 7 additions & 0 deletions velox/expression/Expr.h
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,10 @@ class Expr {
// parent Expr.
std::vector<FieldReference * FOLLY_NONNULL> distinctFields_;

// Fields referenced by multiple inputs, which is subset of distinctFields_.
// Used to determine pre-loading of lazy vectors at current expr.
std::unordered_set<FieldReference * FOLLY_NONNULL> multiplyReferencedFields_;

// True if a null in any of 'distinctFields_' causes 'this' to be
// null for the row.
bool propagatesNulls_ = false;
Expand Down Expand Up @@ -429,6 +433,9 @@ class ExprSet {

std::vector<std::shared_ptr<Expr>> exprs_;

// Fields referenced by multiple expressions in ExprSet.
std::unordered_set<FieldReference * FOLLY_NONNULL> multiplyReferencedFields_;

// Distinct Exprs reachable from 'exprs_' for which reset() needs to
// be called at the start of eval().
std::vector<std::shared_ptr<Expr>> toReset_;
Expand Down

0 comments on commit e63550c

Please sign in to comment.